You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2015/04/11 09:46:40 UTC

svn commit: r1672842 - in /lucene/dev/branches/branch_5x: ./ lucene/ lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/ lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/ lucene/benchmark/ lucene/benchmark/src/java/org/a...

Author: mikemccand
Date: Sat Apr 11 07:46:39 2015
New Revision: 1672842

URL: http://svn.apache.org/r1672842
Log:
LUCENE-5989: allow passing BytesRef to StringField to make it easier to index arbitrary binary tokens

Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/lucene/   (props changed)
    lucene/dev/branches/branch_5x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_5x/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java
    lucene/dev/branches/branch_5x/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/Lucene41StoredFieldsReader.java
    lucene/dev/branches/branch_5x/lucene/benchmark/   (props changed)
    lucene/dev/branches/branch_5x/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java
    lucene/dev/branches/branch_5x/lucene/codecs/   (props changed)
    lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java
    lucene/dev/branches/branch_5x/lucene/core/   (props changed)
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/document/DocumentStoredFieldVisitor.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/document/Field.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/document/StringField.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/StoredFieldVisitor.java
    lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/document/TestField.java
    lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java
    lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/TestLiveFieldValues.java
    lucene/dev/branches/branch_5x/lucene/highlighter/   (props changed)
    lucene/dev/branches/branch_5x/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
    lucene/dev/branches/branch_5x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
    lucene/dev/branches/branch_5x/lucene/misc/   (props changed)
    lucene/dev/branches/branch_5x/lucene/misc/src/test/org/apache/lucene/document/TestLazyDocument.java
    lucene/dev/branches/branch_5x/lucene/test-framework/   (props changed)
    lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterLeafReader.java
    lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/index/MismatchedLeafReader.java
    lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
    lucene/dev/branches/branch_5x/solr/   (props changed)
    lucene/dev/branches/branch_5x/solr/core/   (props changed)
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java

Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1672842&r1=1672841&r2=1672842&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Sat Apr 11 07:46:39 2015
@@ -29,6 +29,11 @@ New Features
   accuracy of SDV. Includes optimized Intersect predicate to avoid many
   geometry checks. Uses TwoPhaseIterator. (David Smiley)
 
+* LUCENE-5989: Allow passing BytesRef to StringField to make it easier
+  to index arbitrary binary tokens, and change the experimental
+  StoredFieldVisitor.stringField API to take UTF-8 byte[] instead of
+  String (Mike McCandless)
+
 Optimizations
 
 * LUCENE-6379: IndexWriter.deleteDocuments(Query...) now detects if

Modified: lucene/dev/branches/branch_5x/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java?rev=1672842&r1=1672841&r2=1672842&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java (original)
+++ lucene/dev/branches/branch_5x/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java Sat Apr 11 07:46:39 2015
@@ -227,7 +227,7 @@ final class Lucene40StoredFieldsReader e
       if ((bits & FIELD_IS_BINARY) != 0) {
         visitor.binaryField(info, bytes);
       } else {
-        visitor.stringField(info, new String(bytes, 0, bytes.length, StandardCharsets.UTF_8));
+        visitor.stringField(info, bytes);
       }
     }
   }

Modified: lucene/dev/branches/branch_5x/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/Lucene41StoredFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/Lucene41StoredFieldsReader.java?rev=1672842&r1=1672841&r2=1672842&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/Lucene41StoredFieldsReader.java (original)
+++ lucene/dev/branches/branch_5x/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene41/Lucene41StoredFieldsReader.java Sat Apr 11 07:46:39 2015
@@ -220,7 +220,7 @@ final class Lucene41StoredFieldsReader e
         length = in.readVInt();
         data = new byte[length];
         in.readBytes(data, 0, length);
-        visitor.stringField(info, new String(data, StandardCharsets.UTF_8));
+        visitor.stringField(info, data);
         break;
       case NUMERIC_INT:
         visitor.intField(info, in.readInt());

Modified: lucene/dev/branches/branch_5x/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java?rev=1672842&r1=1672841&r2=1672842&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java (original)
+++ lucene/dev/branches/branch_5x/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java Sat Apr 11 07:46:39 2015
@@ -17,6 +17,7 @@
 package org.apache.lucene.benchmark.quality.utils;
 
 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -50,7 +51,8 @@ public class DocNameExtractor {
     final List<String> name = new ArrayList<>();
     searcher.getIndexReader().document(docid, new StoredFieldVisitor() {
         @Override
-        public void stringField(FieldInfo fieldInfo, String value) {
+        public void stringField(FieldInfo fieldInfo, byte[] bytes) {
+          String value = new String(bytes, StandardCharsets.UTF_8);
           name.add(value);
         }
 

Modified: lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java?rev=1672842&r1=1672841&r2=1672842&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java (original)
+++ lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java Sat Apr 11 07:46:39 2015
@@ -18,7 +18,6 @@ package org.apache.lucene.codecs.simplet
  */
 
 import java.io.IOException;
-import java.nio.charset.StandardCharsets;
 import java.util.Collection;
 import java.util.Collections;
 
@@ -157,7 +156,9 @@ public class SimpleTextStoredFieldsReade
     readLine();
     assert StringHelper.startsWith(scratch.get(), VALUE);
     if (type == TYPE_STRING) {
-      visitor.stringField(fieldInfo, new String(scratch.bytes(), VALUE.length, scratch.length()-VALUE.length, StandardCharsets.UTF_8));
+      byte[] bytes = new byte[scratch.length() - VALUE.length];
+      System.arraycopy(scratch.bytes(), VALUE.length, bytes, 0, bytes.length);
+      visitor.stringField(fieldInfo, bytes);
     } else if (type == TYPE_BINARY) {
       byte[] copy = new byte[scratch.length()-VALUE.length];
       System.arraycopy(scratch.bytes(), VALUE.length, copy, 0, copy.length);

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java?rev=1672842&r1=1672841&r2=1672842&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java Sat Apr 11 07:46:39 2015
@@ -19,6 +19,7 @@ package org.apache.lucene.codecs;
 import java.io.Closeable;
 import java.io.IOException;
 import java.io.Reader;
+import java.nio.charset.StandardCharsets;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
@@ -142,14 +143,16 @@ public abstract class StoredFieldsWriter
     @Override
     public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException {
       reset(fieldInfo);
+      // TODO: can we avoid new BR here?
       binaryValue = new BytesRef(value);
       write();
     }
 
     @Override
-    public void stringField(FieldInfo fieldInfo, String value) throws IOException {
+    public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
       reset(fieldInfo);
-      stringValue = value;
+      // TODO: can we avoid new String here?
+      stringValue = new String(value, StandardCharsets.UTF_8);
       write();
     }
 

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java?rev=1672842&r1=1672841&r2=1672842&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java Sat Apr 11 07:46:39 2015
@@ -41,7 +41,6 @@ import static org.apache.lucene.codecs.c
 
 import java.io.EOFException;
 import java.io.IOException;
-import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
@@ -220,7 +219,7 @@ public final class CompressingStoredFiel
         length = in.readVInt();
         data = new byte[length];
         in.readBytes(data, 0, length);
-        visitor.stringField(info, new String(data, StandardCharsets.UTF_8));
+        visitor.stringField(info, data);
         break;
       case NUMERIC_INT:
         visitor.intField(info, in.readZInt());

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/document/DocumentStoredFieldVisitor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/document/DocumentStoredFieldVisitor.java?rev=1672842&r1=1672841&r2=1672842&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/document/DocumentStoredFieldVisitor.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/document/DocumentStoredFieldVisitor.java Sat Apr 11 07:46:39 2015
@@ -18,8 +18,9 @@ package org.apache.lucene.document;
  */
 
 import java.io.IOException;
-import java.util.Set;
+import java.nio.charset.StandardCharsets;
 import java.util.HashSet;
+import java.util.Set;
 
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexReader;
@@ -65,12 +66,12 @@ public class DocumentStoredFieldVisitor
   }
 
   @Override
-  public void stringField(FieldInfo fieldInfo, String value) throws IOException {
+  public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
     final FieldType ft = new FieldType(TextField.TYPE_STORED);
     ft.setStoreTermVectors(fieldInfo.hasVectors());
     ft.setOmitNorms(fieldInfo.omitsNorms());
     ft.setIndexOptions(fieldInfo.getIndexOptions());
-    doc.add(new Field(fieldInfo.name, value, ft));
+    doc.add(new Field(fieldInfo.name, new String(value, StandardCharsets.UTF_8), ft));
   }
 
   @Override

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/document/Field.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/document/Field.java?rev=1672842&r1=1672841&r2=1672842&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/document/Field.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/document/Field.java Sat Apr 11 07:46:39 2015
@@ -25,12 +25,14 @@ import org.apache.lucene.analysis.Numeri
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
 import org.apache.lucene.document.FieldType.NumericType;
 import org.apache.lucene.index.FieldInvertState; // javadocs
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexWriter; // javadocs
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.IndexableFieldType;
+import org.apache.lucene.util.AttributeImpl;
 import org.apache.lucene.util.BytesRef;
 
 /**
@@ -213,9 +215,6 @@ public class Field implements IndexableF
     if (bytes == null) {
       throw new IllegalArgumentException("bytes cannot be null");
     }
-    if (type.indexOptions() != IndexOptions.NONE) {
-      throw new IllegalArgumentException("Fields with BytesRef values cannot be indexed");
-    }
     this.fieldsData = bytes;
     this.type = type;
     this.name = name;
@@ -536,16 +535,25 @@ public class Field implements IndexableF
     }
 
     if (!fieldType().tokenized()) {
-      if (stringValue() == null) {
+      if (stringValue() != null) {
+        if (!(reuse instanceof StringTokenStream)) {
+          // lazy init the TokenStream as it is heavy to instantiate
+          // (attributes,...) if not needed
+          reuse = new StringTokenStream();
+        }
+        ((StringTokenStream) reuse).setValue(stringValue());
+        return reuse;
+      } else if (binaryValue() != null) {
+        if (!(reuse instanceof BinaryTokenStream)) {
+          // lazy init the TokenStream as it is heavy to instantiate
+          // (attributes,...) if not needed
+          reuse = new BinaryTokenStream();
+        }
+        ((BinaryTokenStream) reuse).setValue(binaryValue());
+        return reuse;
+      } else {
         throw new IllegalArgumentException("Non-Tokenized Fields must have a String value");
       }
-      if (!(reuse instanceof StringTokenStream)) {
-        // lazy init the TokenStream as it is heavy to instantiate
-        // (attributes,...) if not needed (stored field loading)
-        reuse = new StringTokenStream();
-      }
-      ((StringTokenStream) reuse).setValue(stringValue());
-      return reuse;
     }
 
     if (tokenStream != null) {
@@ -559,7 +567,69 @@ public class Field implements IndexableF
     throw new IllegalArgumentException("Field must have either TokenStream, String, Reader or Number value; got " + this);
   }
   
-  static final class StringTokenStream extends TokenStream {
+  private static final class BinaryTokenStream extends TokenStream {
+    private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);
+
+    // Do not init this to true, becase caller must first call reset:
+    private boolean available;
+  
+    public BinaryTokenStream() {
+    }
+
+    public void setValue(BytesRef value) {
+      bytesAtt.setBytesRef(value);
+    }
+  
+    @Override
+    public boolean incrementToken() {
+      if (available) {
+        clearAttributes();
+        available = false;
+        return true;
+      }
+      return false;
+    }
+  
+    @Override
+    public void reset() {
+      available = true;
+    }
+  
+    public interface ByteTermAttribute extends TermToBytesRefAttribute {
+      public void setBytesRef(BytesRef bytes);
+    }
+  
+    public static class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute, TermToBytesRefAttribute {
+      private BytesRef bytes;
+    
+      @Override
+      public void fillBytesRef() {
+        // no-op: the bytes was already filled by our owner's incrementToken
+      }
+    
+      @Override
+      public BytesRef getBytesRef() {
+        return bytes;
+      }
+
+      @Override
+      public void setBytesRef(BytesRef bytes) {
+        this.bytes = bytes;
+      }
+    
+      @Override
+      public void clear() {
+      }
+    
+      @Override
+      public void copyTo(AttributeImpl target) {
+        ByteTermAttributeImpl other = (ByteTermAttributeImpl) target;
+        other.bytes = bytes;
+      }
+    }
+  }
+
+  private static final class StringTokenStream extends TokenStream {
     private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
     private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
     private boolean used = false;

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/document/StringField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/document/StringField.java?rev=1672842&r1=1672841&r2=1672842&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/document/StringField.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/document/StringField.java Sat Apr 11 07:46:39 2015
@@ -18,6 +18,7 @@ package org.apache.lucene.document;
  */
 
 import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.util.BytesRef;
 
 /** A field that is indexed but not tokenized: the entire
  *  String value is indexed as a single token.  For example
@@ -48,7 +49,9 @@ public final class StringField extends F
     TYPE_STORED.freeze();
   }
 
-  /** Creates a new StringField. 
+  /** Creates a new textual StringField, indexing the provided String value
+   *  as a single token.
+   *
    *  @param name field name
    *  @param value String value
    *  @param stored Store.YES if the content should also be stored
@@ -57,4 +60,18 @@ public final class StringField extends F
   public StringField(String name, String value, Store stored) {
     super(name, value, stored == Store.YES ? TYPE_STORED : TYPE_NOT_STORED);
   }
+
+  /** Creates a new binary StringField, indexing the provided binary (BytesRef)
+   *  value as a single token.
+   *
+   *  @param name field name
+   *  @param value BytesRef value.  The provided value is not cloned so
+   *         you must not change it until the document(s) holding it
+   *         have been indexed.
+   *  @param stored Store.YES if the content should also be stored
+   *  @throws IllegalArgumentException if the field name or value is null.
+   */
+  public StringField(String name, BytesRef value, Store stored) {
+    super(name, value, stored == Store.YES ? TYPE_STORED : TYPE_NOT_STORED);
+  }
 }

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/StoredFieldVisitor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/StoredFieldVisitor.java?rev=1672842&r1=1672841&r2=1672842&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/StoredFieldVisitor.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/StoredFieldVisitor.java Sat Apr 11 07:46:39 2015
@@ -53,8 +53,8 @@ public abstract class StoredFieldVisitor
   public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException {
   }
 
-  /** Process a string field */
-  public void stringField(FieldInfo fieldInfo, String value) throws IOException {
+  /** Process a string field; the provided byte[] value is a UTF-8 encoded string value. */
+  public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
   }
 
   /** Process a int numeric field. */
@@ -93,4 +93,4 @@ public abstract class StoredFieldVisitor
     /** STOP: don't visit this field and stop processing any other fields for this document. */
     STOP
   }
-}
\ No newline at end of file
+}

Modified: lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/document/TestField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/document/TestField.java?rev=1672842&r1=1672841&r2=1672842&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/document/TestField.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/document/TestField.java Sat Apr 11 07:46:39 2015
@@ -22,6 +22,13 @@ import java.nio.charset.StandardCharsets
 
 import org.apache.lucene.analysis.CannedTokenStream;
 import org.apache.lucene.analysis.Token;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 
@@ -408,6 +415,28 @@ public class TestField extends LuceneTes
     
     assertEquals(5L, field.numericValue().longValue());
   }
+
+  public void testIndexedBinaryField() throws Exception {
+    Directory dir = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+    Document doc = new Document();
+    BytesRef br = new BytesRef(new byte[5]);
+    Field field = new StringField("binary", br, Field.Store.YES);
+    assertEquals(br, field.binaryValue());
+    doc.add(field);
+    w.addDocument(doc);
+    IndexReader r = w.getReader();
+
+    IndexSearcher s = newSearcher(r);
+    TopDocs hits = s.search(new TermQuery(new Term("binary", br)), 1);
+    assertEquals(1, hits.totalHits);
+    Document storedDoc = s.doc(hits.scoreDocs[0].doc);
+    assertEquals(br, storedDoc.getField("binary").binaryValue());
+
+    r.close();
+    w.close();
+    dir.close();
+  }
   
   private void trySetByteValue(Field f) {
     try {

Modified: lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java?rev=1672842&r1=1672841&r2=1672842&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java Sat Apr 11 07:46:39 2015
@@ -213,8 +213,9 @@ public class TestRollingUpdates extends
         DirectoryReader open = null;
         for (int i = 0; i < num; i++) {
           Document doc = new Document();// docs.nextDoc();
-          doc.add(newStringField("id", "test", Field.Store.NO));
-          writer.updateDocument(new Term("id", "test"), doc);
+          BytesRef br = new BytesRef("test");
+          doc.add(newStringField("id", br, Field.Store.NO));
+          writer.updateDocument(new Term("id", br), doc);
           if (random().nextInt(3) == 0) {
             if (open == null) {
               open = DirectoryReader.open(writer, true);

Modified: lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/TestLiveFieldValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/TestLiveFieldValues.java?rev=1672842&r1=1672841&r2=1672842&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/TestLiveFieldValues.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/TestLiveFieldValues.java Sat Apr 11 07:46:39 2015
@@ -31,12 +31,12 @@ import org.apache.lucene.analysis.MockAn
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.IntField;
-import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
 
@@ -107,7 +107,7 @@ public class TestLiveFieldValues extends
                 if (threadRandom.nextDouble() <= addChance) {
                   String id = String.format(Locale.ROOT, "%d_%04x", threadID, threadRandom.nextInt(idCount));
                   Integer field = threadRandom.nextInt(Integer.MAX_VALUE);
-                  doc.add(new StringField("id", id, Field.Store.YES));
+                  doc.add(newStringField("id", new BytesRef(id), Field.Store.YES));
                   doc.add(new IntField("field", field.intValue(), Field.Store.YES));
                   w.updateDocument(new Term("id", id), doc);
                   rt.add(id, field);
@@ -118,7 +118,7 @@ public class TestLiveFieldValues extends
 
                 if (allIDs.size() > 0 && threadRandom.nextDouble() <= deleteChance) {
                   String randomID = allIDs.get(threadRandom.nextInt(allIDs.size()));
-                  w.deleteDocuments(new Term("id", randomID));
+                  w.deleteDocuments(new Term("id", new BytesRef(randomID)));
                   rt.delete(randomID);
                   values.put(randomID, missing);
                 }

Modified: lucene/dev/branches/branch_5x/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java?rev=1672842&r1=1672841&r2=1672842&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java (original)
+++ lucene/dev/branches/branch_5x/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java Sat Apr 11 07:46:39 2015
@@ -18,6 +18,7 @@ package org.apache.lucene.search.posting
  */
 
 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
 import java.text.BreakIterator;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -792,7 +793,8 @@ public class PostingsHighlighter {
     }
     
     @Override
-    public void stringField(FieldInfo fieldInfo, String value) throws IOException {
+    public void stringField(FieldInfo fieldInfo, byte[] bytes) throws IOException {
+      String value = new String(bytes, StandardCharsets.UTF_8);
       assert currentField >= 0;
       StringBuilder builder = builders[currentField];
       if (builder.length() > 0 && builder.length() < maxLength) {

Modified: lucene/dev/branches/branch_5x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java?rev=1672842&r1=1672841&r2=1672842&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java (original)
+++ lucene/dev/branches/branch_5x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java Sat Apr 11 07:46:39 2015
@@ -17,6 +17,16 @@ package org.apache.lucene.search.vectorh
  * limitations under the License.
  */
 
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.TextField;
@@ -25,19 +35,10 @@ import org.apache.lucene.index.IndexRead
 import org.apache.lucene.index.StoredFieldVisitor;
 import org.apache.lucene.search.highlight.DefaultEncoder;
 import org.apache.lucene.search.highlight.Encoder;
-import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
 import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo.SubInfo;
+import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
 import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo.Toffs;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-
 /**
  * Base FragmentsBuilder implementation that supports colored pre/post
  * tags and multivalued fields.
@@ -152,7 +153,8 @@ public abstract class BaseFragmentsBuild
     reader.document(docId, new StoredFieldVisitor() {
         
         @Override
-        public void stringField(FieldInfo fieldInfo, String value) {
+        public void stringField(FieldInfo fieldInfo, byte[] bytes) {
+          String value = new String(bytes, StandardCharsets.UTF_8);
           FieldType ft = new FieldType(TextField.TYPE_STORED);
           ft.setStoreTermVectors(fieldInfo.hasVectors());
           fields.add(new Field(fieldInfo.name, value, ft));

Modified: lucene/dev/branches/branch_5x/lucene/misc/src/test/org/apache/lucene/document/TestLazyDocument.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/misc/src/test/org/apache/lucene/document/TestLazyDocument.java?rev=1672842&r1=1672841&r2=1672842&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/misc/src/test/org/apache/lucene/document/TestLazyDocument.java (original)
+++ lucene/dev/branches/branch_5x/lucene/misc/src/test/org/apache/lucene/document/TestLazyDocument.java Sat Apr 11 07:46:39 2015
@@ -16,19 +16,19 @@
  */
 package org.apache.lucene.document;
 
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
-import java.util.Set;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
-import java.util.HashMap;
-import java.io.IOException;
+import java.util.Set;
 
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.store.*;
 import org.apache.lucene.analysis.*;
 import org.apache.lucene.index.*;
 import org.apache.lucene.search.*;
-
+import org.apache.lucene.store.*;
+import org.apache.lucene.util.LuceneTestCase;
 import org.junit.After;
 import org.junit.Before;
 
@@ -209,7 +209,8 @@ public class TestLazyDocument extends Lu
     }
 
     @Override
-    public void stringField(FieldInfo fieldInfo, String value) throws IOException {
+    public void stringField(FieldInfo fieldInfo, byte[] bytes) throws IOException {
+      String value = new String(bytes, StandardCharsets.UTF_8);
       final FieldType ft = new FieldType(TextField.TYPE_STORED);
       ft.setStoreTermVectors(fieldInfo.hasVectors());
       ft.setOmitNorms(fieldInfo.omitsNorms());

Modified: lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterLeafReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterLeafReader.java?rev=1672842&r1=1672841&r2=1672842&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterLeafReader.java (original)
+++ lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterLeafReader.java Sat Apr 11 07:46:39 2015
@@ -78,7 +78,7 @@ public final class FieldFilterLeafReader
       }
 
       @Override
-      public void stringField(FieldInfo fieldInfo, String value) throws IOException {
+      public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
         visitor.stringField(fieldInfo, value);
       }
 

Modified: lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/index/MismatchedLeafReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/index/MismatchedLeafReader.java?rev=1672842&r1=1672841&r2=1672842&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/index/MismatchedLeafReader.java (original)
+++ lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/index/MismatchedLeafReader.java Sat Apr 11 07:46:39 2015
@@ -92,7 +92,7 @@ public class MismatchedLeafReader extend
     }
 
     @Override
-    public void stringField(FieldInfo fieldInfo, String value) throws IOException {
+    public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
       in.stringField(renumber(fieldInfo), value);
     }
 

Modified: lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java?rev=1672842&r1=1672841&r2=1672842&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java (original)
+++ lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java Sat Apr 11 07:46:39 2015
@@ -56,50 +56,25 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicReference;
 import java.util.logging.Logger;
 
-import com.carrotsearch.randomizedtesting.JUnit4MethodProvider;
-import com.carrotsearch.randomizedtesting.LifecycleScope;
-import com.carrotsearch.randomizedtesting.MixWithSuiteName;
-import com.carrotsearch.randomizedtesting.RandomizedContext;
-import com.carrotsearch.randomizedtesting.RandomizedRunner;
-import com.carrotsearch.randomizedtesting.RandomizedTest;
-import com.carrotsearch.randomizedtesting.annotations.Listeners;
-import com.carrotsearch.randomizedtesting.annotations.SeedDecorators;
-import com.carrotsearch.randomizedtesting.annotations.TestGroup;
-import com.carrotsearch.randomizedtesting.annotations.TestMethodProviders;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakGroup;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakGroup.Group;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence;
-import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
-import com.carrotsearch.randomizedtesting.generators.RandomPicks;
-import com.carrotsearch.randomizedtesting.rules.NoClassHooksShadowingRule;
-import com.carrotsearch.randomizedtesting.rules.NoInstanceHooksOverridesRule;
-import com.carrotsearch.randomizedtesting.rules.StaticFieldsInvariantRule;
-
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
 import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.*;
 import org.apache.lucene.index.IndexReader.ReaderClosedListener;
+import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.TermsEnum.SeekStatus;
 import org.apache.lucene.search.AssertingIndexSearcher;
 import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.LRUQueryCache;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.QueryCache;
 import org.apache.lucene.search.QueryCachingPolicy;
-import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.QueryUtils.FCInvisibleMultiReader;
 import org.apache.lucene.store.BaseDirectoryWrapper;
 import org.apache.lucene.store.Directory;
@@ -109,8 +84,8 @@ import org.apache.lucene.store.FlushInfo
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.LockFactory;
 import org.apache.lucene.store.MergeInfo;
-import org.apache.lucene.store.MockDirectoryWrapper;
 import org.apache.lucene.store.MockDirectoryWrapper.Throttling;
+import org.apache.lucene.store.MockDirectoryWrapper;
 import org.apache.lucene.store.NRTCachingDirectory;
 import org.apache.lucene.store.RawDirectoryWrapper;
 import org.apache.lucene.util.automaton.AutomatonTestUtil;
@@ -127,6 +102,31 @@ import org.junit.Test;
 import org.junit.rules.RuleChain;
 import org.junit.rules.TestRule;
 import org.junit.runner.RunWith;
+import com.carrotsearch.randomizedtesting.JUnit4MethodProvider;
+import com.carrotsearch.randomizedtesting.LifecycleScope;
+import com.carrotsearch.randomizedtesting.MixWithSuiteName;
+import com.carrotsearch.randomizedtesting.RandomizedContext;
+import com.carrotsearch.randomizedtesting.RandomizedRunner;
+import com.carrotsearch.randomizedtesting.RandomizedTest;
+import com.carrotsearch.randomizedtesting.annotations.Listeners;
+import com.carrotsearch.randomizedtesting.annotations.SeedDecorators;
+import com.carrotsearch.randomizedtesting.annotations.TestGroup;
+import com.carrotsearch.randomizedtesting.annotations.TestMethodProviders;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakGroup.Group;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakGroup;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies;
+import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
+import com.carrotsearch.randomizedtesting.generators.RandomPicks;
+import com.carrotsearch.randomizedtesting.rules.NoClassHooksShadowingRule;
+import com.carrotsearch.randomizedtesting.rules.NoInstanceHooksOverridesRule;
+import com.carrotsearch.randomizedtesting.rules.StaticFieldsInvariantRule;
 
 import static com.carrotsearch.randomizedtesting.RandomizedTest.systemPropertyAsBoolean;
 import static com.carrotsearch.randomizedtesting.RandomizedTest.systemPropertyAsInt;
@@ -1337,6 +1337,10 @@ public abstract class LuceneTestCase ext
     return newField(random(), name, value, stored == Store.YES ? StringField.TYPE_STORED : StringField.TYPE_NOT_STORED);
   }
 
+  public static Field newStringField(String name, BytesRef value, Store stored) {
+    return newField(random(), name, value, stored == Store.YES ? StringField.TYPE_STORED : StringField.TYPE_NOT_STORED);
+  }
+
   public static Field newTextField(String name, String value, Store stored) {
     return newField(random(), name, value, stored == Store.YES ? TextField.TYPE_STORED : TextField.TYPE_NOT_STORED);
   }
@@ -1344,6 +1348,10 @@ public abstract class LuceneTestCase ext
   public static Field newStringField(Random random, String name, String value, Store stored) {
     return newField(random, name, value, stored == Store.YES ? StringField.TYPE_STORED : StringField.TYPE_NOT_STORED);
   }
+
+  public static Field newStringField(Random random, String name, BytesRef value, Store stored) {
+    return newField(random, name, value, stored == Store.YES ? StringField.TYPE_STORED : StringField.TYPE_NOT_STORED);
+  }
   
   public static Field newTextField(Random random, String name, String value, Store stored) {
     return newField(random, name, value, stored == Store.YES ? TextField.TYPE_STORED : TextField.TYPE_NOT_STORED);
@@ -1373,7 +1381,7 @@ public abstract class LuceneTestCase ext
   // write-once schema sort of helper class then we can
   // remove the sync here.  We can also fold the random
   // "enable norms" (now commented out, below) into that:
-  public synchronized static Field newField(Random random, String name, String value, FieldType type) {
+  public synchronized static Field newField(Random random, String name, Object value, FieldType type) {
 
     // Defeat any consumers that illegally rely on intern'd
     // strings (we removed this from Lucene a while back):
@@ -1389,7 +1397,7 @@ public abstract class LuceneTestCase ext
         type = mergeTermVectorOptions(type, prevType);
       }
 
-      return new Field(name, value, type);
+      return createField(name, value, type);
     }
 
     // TODO: once all core & test codecs can index
@@ -1435,7 +1443,17 @@ public abstract class LuceneTestCase ext
     }
     */
     
-    return new Field(name, value, newType);
+    return createField(name, value, newType);
+  }
+
+  private static Field createField(String name, Object value, FieldType fieldType) {
+    if (value instanceof String) {
+      return new Field(name, (String) value, fieldType);
+    } else if (value instanceof BytesRef) {
+      return new Field(name, (BytesRef) value, fieldType);
+    } else {
+      throw new IllegalArgumentException("value must be String or BytesRef");
+    }
   }
 
   /** 

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java?rev=1672842&r1=1672841&r2=1672842&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java Sat Apr 11 07:46:39 2015
@@ -1,6 +1,7 @@
 package org.apache.solr.handler.component;
 
 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -275,8 +276,8 @@ public class TermVectorComponent extends
     // once we find it...
     final StoredFieldVisitor getUniqValue = new StoredFieldVisitor() {
       @Override 
-      public void stringField(FieldInfo fieldInfo, String value) {
-        uniqValues.add(value);
+      public void stringField(FieldInfo fieldInfo, byte[] bytes) {
+        uniqValues.add(new String(bytes, StandardCharsets.UTF_8));
       }
 
       @Override 

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java?rev=1672842&r1=1672841&r2=1672842&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java Sat Apr 11 07:46:39 2015
@@ -20,6 +20,7 @@ package org.apache.solr.search;
 import java.io.Closeable;
 import java.io.IOException;
 import java.net.URL;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -79,10 +80,10 @@ import org.apache.solr.core.SolrInfoMBea
 import org.apache.solr.request.LocalSolrQueryRequest;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.request.SolrRequestInfo;
-import org.apache.solr.search.facet.UnInvertedField;
 import org.apache.solr.response.SolrQueryResponse;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.schema.SchemaField;
+import org.apache.solr.search.facet.UnInvertedField;
 import org.apache.solr.search.stats.StatsSource;
 import org.apache.solr.update.SolrIndexConfig;
 import org.slf4j.Logger;
@@ -617,7 +618,8 @@ public class SolrIndexSearcher extends I
     }
 
     @Override
-    public void stringField(FieldInfo fieldInfo, String value) throws IOException {
+    public void stringField(FieldInfo fieldInfo, byte[] bytes) throws IOException {
+      String value = new String(bytes, StandardCharsets.UTF_8);
       final FieldType ft = new FieldType(TextField.TYPE_STORED);
       ft.setStoreTermVectors(fieldInfo.hasVectors());
       ft.setOmitNorms(fieldInfo.omitsNorms());
@@ -707,7 +709,7 @@ public class SolrIndexSearcher extends I
               throw new AssertionError();
             }
           } else {
-            visitor.stringField(info, f.stringValue());
+            visitor.stringField(info, f.stringValue().getBytes(StandardCharsets.UTF_8));
           }
           break;
         case NO: