You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by no...@apache.org on 2019/01/09 02:50:04 UTC
[1/3] lucene-solr:branch_8x: SOLR-12983: JavabinLoader should avoid
creating String Objects and create UTF8CharSequence fields from byte[]
Repository: lucene-solr
Updated Branches:
refs/heads/branch_8x 28859fe65 -> 0d4c81f2f
SOLR-12983: JavabinLoader should avoid creating String Objects and create UTF8CharSequence fields from byte[]
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/507a96e4
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/507a96e4
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/507a96e4
Branch: refs/heads/branch_8x
Commit: 507a96e4181d4151d36332d46dd51e7ca5a09f90
Parents: 28859fe
Author: Noble Paul <no...@apache.org>
Authored: Tue Jan 8 15:27:48 2019 +1100
Committer: Noble Paul <no...@apache.org>
Committed: Wed Jan 9 13:49:05 2019 +1100
----------------------------------------------------------------------
solr/CHANGES.txt | 2 +
.../org/apache/solr/schema/DatePointField.java | 4 +-
.../apache/solr/schema/DoublePointField.java | 2 +-
.../org/apache/solr/schema/FloatPointField.java | 2 +-
.../org/apache/solr/schema/IntPointField.java | 4 +-
.../org/apache/solr/schema/LongPointField.java | 4 +-
.../org/apache/solr/schema/TrieDateField.java | 2 +-
.../org/apache/solr/schema/TrieDoubleField.java | 2 +-
.../org/apache/solr/schema/TrieFloatField.java | 2 +-
.../org/apache/solr/schema/TrieIntField.java | 4 +-
.../org/apache/solr/schema/TrieLongField.java | 4 +-
.../org/apache/solr/update/DocumentBuilder.java | 2 +
.../request/JavaBinUpdateRequestCodec.java | 272 +++++++++++--------
.../common/util/ByteArrayUtf8CharSequence.java | 65 +++--
.../org/apache/solr/common/util/BytesBlock.java | 65 +++++
.../apache/solr/common/util/JavaBinCodec.java | 69 +++--
.../solr/common/util/Utf8CharSequence.java | 21 +-
.../solr/common/util/Utf8CharSequenceTest.java | 41 +++
18 files changed, 402 insertions(+), 165 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/507a96e4/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 8978b56..6d83c6e 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -252,6 +252,8 @@ Improvements
`solr.max.booleanClauses` sysprop is specified, that will override the 1024 default. This enables users to
update this property across the board more easily. (Jason Gerlowski)
+* SOLR-12983: JavabinLoader should avoid creating String Objects and create UTF8CharSequence fields from byte[] (noble)
+
Other Changes
----------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/507a96e4/solr/core/src/java/org/apache/solr/schema/DatePointField.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/schema/DatePointField.java b/solr/core/src/java/org/apache/solr/schema/DatePointField.java
index 2bbe4ad..9360aa4 100644
--- a/solr/core/src/java/org/apache/solr/schema/DatePointField.java
+++ b/solr/core/src/java/org/apache/solr/schema/DatePointField.java
@@ -109,8 +109,8 @@ public class DatePointField extends PointField implements DateValueFieldType {
@Override
public Object toNativeType(Object val) {
- if (val instanceof String) {
- return DateMathParser.parseMath(null, (String) val);
+ if (val instanceof CharSequence) {
+ return DateMathParser.parseMath(null, val.toString());
}
return super.toNativeType(val);
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/507a96e4/solr/core/src/java/org/apache/solr/schema/DoublePointField.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/schema/DoublePointField.java b/solr/core/src/java/org/apache/solr/schema/DoublePointField.java
index 3b68ece..042dd54 100644
--- a/solr/core/src/java/org/apache/solr/schema/DoublePointField.java
+++ b/solr/core/src/java/org/apache/solr/schema/DoublePointField.java
@@ -49,7 +49,7 @@ public class DoublePointField extends PointField implements DoubleValueFieldType
public Object toNativeType(Object val) {
if (val == null) return null;
if (val instanceof Number) return ((Number) val).doubleValue();
- if (val instanceof String) return Double.parseDouble((String) val);
+ if (val instanceof CharSequence) return Double.parseDouble( val.toString());
return super.toNativeType(val);
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/507a96e4/solr/core/src/java/org/apache/solr/schema/FloatPointField.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/schema/FloatPointField.java b/solr/core/src/java/org/apache/solr/schema/FloatPointField.java
index 68155f4..b01a629 100644
--- a/solr/core/src/java/org/apache/solr/schema/FloatPointField.java
+++ b/solr/core/src/java/org/apache/solr/schema/FloatPointField.java
@@ -49,7 +49,7 @@ public class FloatPointField extends PointField implements FloatValueFieldType {
public Object toNativeType(Object val) {
if (val == null) return null;
if (val instanceof Number) return ((Number) val).floatValue();
- if (val instanceof String) return Float.parseFloat((String) val);
+ if (val instanceof CharSequence) return Float.parseFloat(val.toString());
return super.toNativeType(val);
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/507a96e4/solr/core/src/java/org/apache/solr/schema/IntPointField.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/schema/IntPointField.java b/solr/core/src/java/org/apache/solr/schema/IntPointField.java
index a43639c..84a9a78 100644
--- a/solr/core/src/java/org/apache/solr/schema/IntPointField.java
+++ b/solr/core/src/java/org/apache/solr/schema/IntPointField.java
@@ -49,9 +49,9 @@ public class IntPointField extends PointField implements IntValueFieldType {
if (val == null) return null;
if (val instanceof Number) return ((Number) val).intValue();
try {
- if (val instanceof String) return Integer.parseInt((String) val);
+ if (val instanceof CharSequence) return Integer.parseInt( val.toString());
} catch (NumberFormatException e) {
- Float v = Float.parseFloat((String) val);
+ Float v = Float.parseFloat(val.toString());
return v.intValue();
}
return super.toNativeType(val);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/507a96e4/solr/core/src/java/org/apache/solr/schema/LongPointField.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/schema/LongPointField.java b/solr/core/src/java/org/apache/solr/schema/LongPointField.java
index d5a5072..83a6ed3 100644
--- a/solr/core/src/java/org/apache/solr/schema/LongPointField.java
+++ b/solr/core/src/java/org/apache/solr/schema/LongPointField.java
@@ -48,9 +48,9 @@ public class LongPointField extends PointField implements LongValueFieldType {
if (val == null) return null;
if (val instanceof Number) return ((Number) val).longValue();
try {
- if (val instanceof String) return Long.parseLong((String) val);
+ if (val instanceof CharSequence) return Long.parseLong(val.toString());
} catch (NumberFormatException e) {
- Double v = Double.parseDouble((String) val);
+ Double v = Double.parseDouble(val.toString());
return v.longValue();
}
return super.toNativeType(val);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/507a96e4/solr/core/src/java/org/apache/solr/schema/TrieDateField.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/schema/TrieDateField.java b/solr/core/src/java/org/apache/solr/schema/TrieDateField.java
index 5c87205..f52be51 100644
--- a/solr/core/src/java/org/apache/solr/schema/TrieDateField.java
+++ b/solr/core/src/java/org/apache/solr/schema/TrieDateField.java
@@ -96,7 +96,7 @@ public class TrieDateField extends TrieField implements DateValueFieldType {
@Override
public Object toNativeType(Object val) {
- if (val instanceof String) {
+ if (val instanceof CharSequence) {
return DateMathParser.parseMath(null, (String)val);
}
return super.toNativeType(val);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/507a96e4/solr/core/src/java/org/apache/solr/schema/TrieDoubleField.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/schema/TrieDoubleField.java b/solr/core/src/java/org/apache/solr/schema/TrieDoubleField.java
index 892250e..8e622c6 100644
--- a/solr/core/src/java/org/apache/solr/schema/TrieDoubleField.java
+++ b/solr/core/src/java/org/apache/solr/schema/TrieDoubleField.java
@@ -61,7 +61,7 @@ public class TrieDoubleField extends TrieField implements DoubleValueFieldType {
public Object toNativeType(Object val) {
if(val==null) return null;
if (val instanceof Number) return ((Number) val).doubleValue();
- if (val instanceof String) return Double.parseDouble((String) val);
+ if (val instanceof CharSequence) return Double.parseDouble(val.toString());
return super.toNativeType(val);
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/507a96e4/solr/core/src/java/org/apache/solr/schema/TrieFloatField.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/schema/TrieFloatField.java b/solr/core/src/java/org/apache/solr/schema/TrieFloatField.java
index 20995e4..b789564 100644
--- a/solr/core/src/java/org/apache/solr/schema/TrieFloatField.java
+++ b/solr/core/src/java/org/apache/solr/schema/TrieFloatField.java
@@ -61,7 +61,7 @@ public class TrieFloatField extends TrieField implements FloatValueFieldType {
public Object toNativeType(Object val) {
if(val==null) return null;
if (val instanceof Number) return ((Number) val).floatValue();
- if (val instanceof String) return Float.parseFloat((String) val);
+ if (val instanceof CharSequence) return Float.parseFloat(val.toString());
return super.toNativeType(val);
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/507a96e4/solr/core/src/java/org/apache/solr/schema/TrieIntField.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/schema/TrieIntField.java b/solr/core/src/java/org/apache/solr/schema/TrieIntField.java
index 51fb2eb..8acb66d 100644
--- a/solr/core/src/java/org/apache/solr/schema/TrieIntField.java
+++ b/solr/core/src/java/org/apache/solr/schema/TrieIntField.java
@@ -55,9 +55,9 @@ public class TrieIntField extends TrieField implements IntValueFieldType {
if(val==null) return null;
if (val instanceof Number) return ((Number) val).intValue();
try {
- if (val instanceof String) return Integer.parseInt((String) val);
+ if (val instanceof CharSequence) return Integer.parseInt(val.toString());
} catch (NumberFormatException e) {
- Float v = Float.parseFloat((String) val);
+ Float v = Float.parseFloat(val.toString());
return v.intValue();
}
return super.toNativeType(val);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/507a96e4/solr/core/src/java/org/apache/solr/schema/TrieLongField.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/schema/TrieLongField.java b/solr/core/src/java/org/apache/solr/schema/TrieLongField.java
index 02deade..bf6d393 100644
--- a/solr/core/src/java/org/apache/solr/schema/TrieLongField.java
+++ b/solr/core/src/java/org/apache/solr/schema/TrieLongField.java
@@ -55,9 +55,9 @@ public class TrieLongField extends TrieField implements LongValueFieldType {
if(val==null) return null;
if (val instanceof Number) return ((Number) val).longValue();
try {
- if (val instanceof String) return Long.parseLong((String) val);
+ if (val instanceof CharSequence) return Long.parseLong(val.toString());
} catch (NumberFormatException e) {
- Double v = Double.parseDouble((String) val);
+ Double v = Double.parseDouble((String)val);
return v.longValue();
}
return super.toNativeType(val);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/507a96e4/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java b/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
index 8fc5541..6aef1b4 100644
--- a/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
+++ b/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
@@ -29,6 +29,7 @@ import org.apache.solr.common.SolrDocumentBase;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
+import org.apache.solr.common.util.ByteArrayUtf8CharSequence;
import org.apache.solr.schema.CopyField;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
@@ -165,6 +166,7 @@ public class DocumentBuilder {
if( v == null ) {
continue;
}
+ v = ByteArrayUtf8CharSequence.convertCharSeq(v);
hasField = true;
if (sfield != null) {
used = true;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/507a96e4/solr/solrj/src/java/org/apache/solr/client/solrj/request/JavaBinUpdateRequestCodec.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/request/JavaBinUpdateRequestCodec.java b/solr/solrj/src/java/org/apache/solr/client/solrj/request/JavaBinUpdateRequestCodec.java
index dde6dba..59072c5 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/request/JavaBinUpdateRequestCodec.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/request/JavaBinUpdateRequestCodec.java
@@ -23,12 +23,14 @@ import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
+import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.SolrInputField;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.params.SolrParams;
@@ -38,6 +40,8 @@ import org.apache.solr.common.util.NamedList;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import static org.apache.solr.common.util.ByteArrayUtf8CharSequence.convertCharSeq;
+
/**
* Provides methods for marshalling an UpdateRequest to a NamedList which can be serialized in the javabin format and
* vice versa.
@@ -111,87 +115,7 @@ public class JavaBinUpdateRequestCodec {
Map<String,Map<String,Object>> delByIdMap;
List<String> delByQ;
final NamedList[] namedList = new NamedList[1];
- try (JavaBinCodec codec = new JavaBinCodec() {
-
- // NOTE: this only works because this is an anonymous inner class
- // which will only ever be used on a single stream -- if this class
- // is ever refactored, this will not work.
- private boolean seenOuterMostDocIterator = false;
-
- @Override
- public NamedList readNamedList(DataInputInputStream dis) throws IOException {
- int sz = readSize(dis);
- NamedList nl = new NamedList();
- if (namedList[0] == null) {
- namedList[0] = nl;
- }
- for (int i = 0; i < sz; i++) {
- String name = (String) readVal(dis);
- Object val = readVal(dis);
- nl.add(name, val);
- }
- return nl;
- }
-
- @Override
- public List readIterator(DataInputInputStream fis) throws IOException {
- // default behavior for reading any regular Iterator in the stream
- if (seenOuterMostDocIterator) return super.readIterator(fis);
-
- // special treatment for first outermost Iterator
- // (the list of documents)
- seenOuterMostDocIterator = true;
- return readOuterMostDocIterator(fis);
- }
-
- private List readOuterMostDocIterator(DataInputInputStream fis) throws IOException {
- NamedList params = (NamedList) namedList[0].get("params");
- updateRequest.setParams(new ModifiableSolrParams(params.toSolrParams()));
- if (handler == null) return super.readIterator(fis);
- Integer commitWithin = null;
- Boolean overwrite = null;
- Object o = null;
- while (true) {
- if (o == null) {
- o = readVal(fis);
- }
-
- if (o == END_OBJ) {
- break;
- }
-
- SolrInputDocument sdoc = null;
- if (o instanceof List) {
- sdoc = listToSolrInputDocument((List<NamedList>) o);
- } else if (o instanceof NamedList) {
- UpdateRequest req = new UpdateRequest();
- req.setParams(new ModifiableSolrParams(((NamedList) o).toSolrParams()));
- handler.update(null, req, null, null);
- } else if (o instanceof Map.Entry){
- sdoc = (SolrInputDocument) ((Map.Entry) o).getKey();
- Map p = (Map) ((Map.Entry) o).getValue();
- if (p != null) {
- commitWithin = (Integer) p.get(UpdateRequest.COMMIT_WITHIN);
- overwrite = (Boolean) p.get(UpdateRequest.OVERWRITE);
- }
- } else {
- sdoc = (SolrInputDocument) o;
- }
-
- // peek at the next object to see if we're at the end
- o = readVal(fis);
- if (o == END_OBJ) {
- // indicate that we've hit the last doc in the batch, used to enable optimizations when doing replication
- updateRequest.lastDocInBatch();
- }
-
- handler.update(sdoc, updateRequest, commitWithin, overwrite);
- }
- return Collections.EMPTY_LIST;
- }
-
- };) {
-
+ try (JavaBinCodec codec = new StreamingCodec(namedList, updateRequest, handler)) {
codec.unmarshal(is);
}
@@ -248,43 +172,169 @@ public class JavaBinUpdateRequestCodec {
return updateRequest;
}
- private SolrInputDocument listToSolrInputDocument(List<NamedList> namedList) {
- SolrInputDocument doc = new SolrInputDocument();
- for (int i = 0; i < namedList.size(); i++) {
- NamedList nl = namedList.get(i);
- if (i == 0) {
- Float boost = (Float) nl.getVal(0);
- if (boost != null && boost.floatValue() != 1f) {
- String message = "Ignoring document boost: " + boost + " as index-time boosts are not supported anymore";
- if (WARNED_ABOUT_INDEX_TIME_BOOSTS.compareAndSet(false, true)) {
- log.warn(message);
- } else {
- log.debug(message);
+
+ private NamedList solrParamsToNamedList(SolrParams params) {
+ if (params == null) return new NamedList();
+ return params.toNamedList();
+ }
+
+ public interface StreamingUpdateHandler {
+ void update(SolrInputDocument document, UpdateRequest req, Integer commitWithin, Boolean override);
+ }
+
+ static class MaskCharSequenceSolrInputDoc extends SolrInputDocument {
+ public MaskCharSequenceSolrInputDoc(Map<String, SolrInputField> fields) {
+ super(fields);
+ }
+
+ @Override
+ public Object getFieldValue(String name) {
+ return convertCharSeq(super.getFieldValue(name));
+ }
+
+ }
+
+ class StreamingCodec extends JavaBinCodec {
+
+ private final NamedList[] namedList;
+ private final UpdateRequest updateRequest;
+ private final StreamingUpdateHandler handler;
+ // NOTE: this only works because this is an anonymous inner class
+ // which will only ever be used on a single stream -- if this class
+ // is ever refactored, this will not work.
+ private boolean seenOuterMostDocIterator;
+
+ public StreamingCodec(NamedList[] namedList, UpdateRequest updateRequest, StreamingUpdateHandler handler) {
+ this.namedList = namedList;
+ this.updateRequest = updateRequest;
+ this.handler = handler;
+ seenOuterMostDocIterator = false;
+ }
+
+ @Override
+ protected SolrInputDocument createSolrInputDocument(int sz) {
+ return new MaskCharSequenceSolrInputDoc(new LinkedHashMap(sz));
+ }
+
+ @Override
+ public NamedList readNamedList(DataInputInputStream dis) throws IOException {
+ int sz = readSize(dis);
+ NamedList nl = new NamedList();
+ if (namedList[0] == null) {
+ namedList[0] = nl;
+ }
+ for (int i = 0; i < sz; i++) {
+ String name = (String) readVal(dis);
+ Object val = readVal(dis);
+ nl.add(name, val);
+ }
+ return nl;
+ }
+
+ private SolrInputDocument listToSolrInputDocument(List<NamedList> namedList) {
+ SolrInputDocument doc = new SolrInputDocument();
+ for (int i = 0; i < namedList.size(); i++) {
+ NamedList nl = namedList.get(i);
+ if (i == 0) {
+ Float boost = (Float) nl.getVal(0);
+ if (boost != null && boost.floatValue() != 1f) {
+ String message = "Ignoring document boost: " + boost + " as index-time boosts are not supported anymore";
+ if (WARNED_ABOUT_INDEX_TIME_BOOSTS.compareAndSet(false, true)) {
+ log.warn(message);
+ } else {
+ log.debug(message);
+ }
}
+ } else {
+ Float boost = (Float) nl.getVal(2);
+ if (boost != null && boost.floatValue() != 1f) {
+ String message = "Ignoring field boost: " + boost + " as index-time boosts are not supported anymore";
+ if (WARNED_ABOUT_INDEX_TIME_BOOSTS.compareAndSet(false, true)) {
+ log.warn(message);
+ } else {
+ log.debug(message);
+ }
+ }
+ doc.addField((String) nl.getVal(0),
+ nl.getVal(1));
}
- } else {
- Float boost = (Float) nl.getVal(2);
- if (boost != null && boost.floatValue() != 1f) {
- String message = "Ignoring field boost: " + boost + " as index-time boosts are not supported anymore";
- if (WARNED_ABOUT_INDEX_TIME_BOOSTS.compareAndSet(false, true)) {
- log.warn(message);
+ }
+ return doc;
+ }
+
+ @Override
+ public List readIterator(DataInputInputStream fis) throws IOException {
+ // default behavior for reading any regular Iterator in the stream
+ if (seenOuterMostDocIterator) return super.readIterator(fis);
+
+ // special treatment for first outermost Iterator
+ // (the list of documents)
+ seenOuterMostDocIterator = true;
+ return readOuterMostDocIterator(fis);
+ }
+
+
+ /* @Override
+ protected Object readDocumentFieldVal(String fieldName, DataInputInputStream dis) throws IOException {
+ super.readStringAsCharSeq = utf8FieldPredicate != null && utf8FieldPredicate.test(fieldName);
+ try {
+ return super.readDocumentFieldVal(fieldName, dis);
+ } finally {
+ super.readStringAsCharSeq = false;
+ }
+ }*/
+
+ private List readOuterMostDocIterator(DataInputInputStream fis) throws IOException {
+ NamedList params = (NamedList) namedList[0].get("params");
+ updateRequest.setParams(new ModifiableSolrParams(params.toSolrParams()));
+ if (handler == null) return super.readIterator(fis);
+ Integer commitWithin = null;
+ Boolean overwrite = null;
+ Object o = null;
+ super.readStringAsCharSeq = true;
+ try {
+ while (true) {
+ if (o == null) {
+ o = readVal(fis);
+ }
+
+ if (o == END_OBJ) {
+ break;
+ }
+
+ SolrInputDocument sdoc = null;
+ if (o instanceof List) {
+ sdoc = listToSolrInputDocument((List<NamedList>) o);
+ } else if (o instanceof NamedList) {
+ UpdateRequest req = new UpdateRequest();
+ req.setParams(new ModifiableSolrParams(((NamedList) o).toSolrParams()));
+ handler.update(null, req, null, null);
+ } else if (o instanceof Map.Entry) {
+ sdoc = (SolrInputDocument) ((Entry) o).getKey();
+ Map p = (Map) ((Entry) o).getValue();
+ if (p != null) {
+ commitWithin = (Integer) p.get(UpdateRequest.COMMIT_WITHIN);
+ overwrite = (Boolean) p.get(UpdateRequest.OVERWRITE);
+ }
} else {
- log.debug(message);
+ sdoc = (SolrInputDocument) o;
+ }
+
+ // peek at the next object to see if we're at the end
+ o = readVal(fis);
+ if (o == END_OBJ) {
+ // indicate that we've hit the last doc in the batch, used to enable optimizations when doing replication
+ updateRequest.lastDocInBatch();
}
+
+ handler.update(sdoc, updateRequest, commitWithin, overwrite);
}
- doc.addField((String) nl.getVal(0),
- nl.getVal(1));
+ return Collections.EMPTY_LIST;
+ } finally {
+ super.readStringAsCharSeq = false;
+
}
}
- return doc;
- }
-
- private NamedList solrParamsToNamedList(SolrParams params) {
- if (params == null) return new NamedList();
- return params.toNamedList();
- }
- public static interface StreamingUpdateHandler {
- public void update(SolrInputDocument document, UpdateRequest req, Integer commitWithin, Boolean override);
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/507a96e4/solr/solrj/src/java/org/apache/solr/common/util/ByteArrayUtf8CharSequence.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/util/ByteArrayUtf8CharSequence.java b/solr/solrj/src/java/org/apache/solr/common/util/ByteArrayUtf8CharSequence.java
index 4ac48f2..c9a05cb 100644
--- a/solr/solrj/src/java/org/apache/solr/common/util/ByteArrayUtf8CharSequence.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/ByteArrayUtf8CharSequence.java
@@ -23,6 +23,7 @@ import java.util.Collection;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
+import java.util.function.Function;
import org.noggit.CharArr;
@@ -38,6 +39,7 @@ public class ByteArrayUtf8CharSequence implements Utf8CharSequence {
protected int hashCode = Integer.MIN_VALUE;
protected int length;
protected volatile String utf16;
+ public Function<ByteArrayUtf8CharSequence, String> stringProvider;
public ByteArrayUtf8CharSequence(String utf16) {
buf = new byte[Math.multiplyExact(utf16.length(), 3)];
@@ -51,21 +53,39 @@ public class ByteArrayUtf8CharSequence implements Utf8CharSequence {
assert isValid();
}
+ public byte[] getBuf() {
+ return buf;
+ }
+
+ public int offset() {
+ return offset;
+ }
+
public ByteArrayUtf8CharSequence(byte[] buf, int offset, int length) {
this.buf = buf;
this.offset = offset;
this.length = length;
}
+ @Override
+ public byte byteAt(int idx) {
+ if (idx >= length || idx < 0) throw new ArrayIndexOutOfBoundsException("idx must be >=0 and < " + length);
+ return buf[offset + idx];
+ }
+
public String getStringOrNull() {
return utf16;
}
+
@Override
public int write(int start, byte[] buffer, int pos) {
- if (start == -1 || start >= length) return -1;
- if (length == 0) return 0;
- int writableBytes = Math.min(length - start, buffer.length - pos);
- System.arraycopy(buf, offset + start, buffer, pos, writableBytes);
+ return _writeBytes(buf, offset, length, start, buffer, pos);
+ }
+
+ static int _writeBytes(byte[] src, int srcOffset, int srcLength, int start, byte[] buffer, int pos) {
+ if (srcOffset == -1 || start >= srcLength) return -1;
+ int writableBytes = Math.min(srcLength - start, buffer.length - pos);
+ System.arraycopy(src, srcOffset + start, buffer, pos, writableBytes);
return writableBytes;
}
@@ -97,15 +117,26 @@ public class ByteArrayUtf8CharSequence implements Utf8CharSequence {
@Override
public boolean equals(Object other) {
- if (other == null) {
+ if (other instanceof Utf8CharSequence) {
+ if (size() != ((Utf8CharSequence) other).size()) return false;
+ if (other instanceof ByteArrayUtf8CharSequence) {
+ if (this.length != ((ByteArrayUtf8CharSequence) other).length) return false;
+ ByteArrayUtf8CharSequence that = (ByteArrayUtf8CharSequence) other;
+ return _equals(this.buf, this.offset, this.offset + this.length,
+ that.buf, that.offset, that.offset + that.length);
+ }
+ return utf8Equals(this, (Utf8CharSequence) other);
+ } else {
return false;
}
- if (other instanceof ByteArrayUtf8CharSequence) {
- ByteArrayUtf8CharSequence that = (ByteArrayUtf8CharSequence) other;
- return _equals(this.buf, this.offset, this.offset + this.length,
- that.buf, that.offset, that.offset + that.length);
+ }
+
+ public static boolean utf8Equals(Utf8CharSequence utf8_1, Utf8CharSequence utf8_2) {
+ if (utf8_1.size() != utf8_2.size()) return false;
+ for (int i = 0; i < utf8_1.size(); i++) {
+ if (utf8_1.byteAt(i) != utf8_2.byteAt(i)) return false;
}
- return false;
+ return true;
}
@@ -115,14 +146,16 @@ public class ByteArrayUtf8CharSequence implements Utf8CharSequence {
}
private String _getStr() {
+ String utf16 = this.utf16;
if (utf16 == null) {
- synchronized (this) {
- if (utf16 == null) {
- CharArr arr = new CharArr();
- ByteUtils.UTF8toUTF16(buf, offset, length, arr);
- utf16 = arr.toString();
- }
+ if (stringProvider != null) {
+ this.utf16 = utf16 = stringProvider.apply(this);
+ } else {
+ CharArr arr = new CharArr();
+ ByteUtils.UTF8toUTF16(buf, offset, length, arr);
+ this.utf16 = utf16 = arr.toString();
}
+
}
return utf16;
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/507a96e4/solr/solrj/src/java/org/apache/solr/common/util/BytesBlock.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/util/BytesBlock.java b/solr/solrj/src/java/org/apache/solr/common/util/BytesBlock.java
new file mode 100644
index 0000000..4ce4ecc
--- /dev/null
+++ b/solr/solrj/src/java/org/apache/solr/common/util/BytesBlock.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.common.util;
+
+public class BytesBlock {
+ private int bufSize;
+ public byte[] buf;
+ //current position
+ private int pos;
+ //going to expand. mark the start position
+ private int startPos = 0;
+
+ public BytesBlock(int sz) {
+ this.bufSize = sz;
+ create();
+ }
+
+ public int getPos() {
+ return pos;
+ }
+
+ public int getStartPos() {
+ return startPos;
+ }
+
+ public byte[] getBuf() {
+ return buf;
+ }
+
+ public BytesBlock expand(int sz) {
+ if (bufSize - pos >= sz) {
+ return markPositions(sz);
+ }
+ if (sz > (bufSize / 4)) return new BytesBlock(sz).expand(sz);// a reasonably large block, create new
+ create();
+ return markPositions(sz);
+ }
+
+ private BytesBlock markPositions(int sz) {
+ this.startPos = pos;
+ pos += sz;
+ return this;
+ }
+
+
+ private void create() {
+ buf = new byte[bufSize];
+ startPos = pos = 0;
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/507a96e4/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java b/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
index 586adfb..782d109 100644
--- a/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
@@ -36,6 +36,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.BiConsumer;
+import java.util.function.Function;
import org.apache.solr.common.ConditionalMapWriter;
import org.apache.solr.common.EnumFieldValue;
@@ -117,7 +118,7 @@ public class JavaBinCodec implements PushWriter {
private WritableDocFields writableDocFields;
private boolean alreadyMarshalled;
private boolean alreadyUnmarshalled;
- private boolean readStringAsCharSeq = false;
+ protected boolean readStringAsCharSeq = false;
public JavaBinCodec() {
resolver =null;
@@ -284,7 +285,7 @@ public class JavaBinCodec implements PushWriter {
// OK, try type + size in single byte
switch (tagByte >>> 5) {
case STR >>> 5:
- return readStr(dis);
+ return readStr(dis, stringCache, readStringAsCharSeq);
case SINT >>> 5:
return readSmallInt(dis);
case SLONG >>> 5:
@@ -355,6 +356,9 @@ public class JavaBinCodec implements PushWriter {
writeSolrDocumentList((SolrDocumentList) val);
return true;
}
+ if (val instanceof SolrInputField) {
+ return writeKnownType(((SolrInputField) val).getValue());
+ }
if (val instanceof IteratorWriter) {
writeIterator((IteratorWriter) val);
return true;
@@ -612,7 +616,7 @@ public class JavaBinCodec implements PushWriter {
log.debug(message);
}
}
- SolrInputDocument sdoc = new SolrInputDocument(new LinkedHashMap<>(sz));
+ SolrInputDocument sdoc = createSolrInputDocument(sz);
for (int i = 0; i < sz; i++) {
String fieldName;
Object obj = readVal(dis); // could be a boost, a field name, or a child document
@@ -639,15 +643,16 @@ public class JavaBinCodec implements PushWriter {
return sdoc;
}
+ protected SolrInputDocument createSolrInputDocument(int sz) {
+ return new SolrInputDocument(new LinkedHashMap<>(sz));
+ }
+
public void writeSolrInputDocument(SolrInputDocument sdoc) throws IOException {
List<SolrInputDocument> children = sdoc.getChildDocuments();
int sz = sdoc.size() + (children==null ? 0 : children.size());
writeTag(SOLRINPUTDOC, sz);
writeFloat(1f); // document boost
- for (SolrInputField inputField : sdoc.values()) {
- writeExternString(inputField.getName());
- writeVal(inputField.getValue());
- }
+ sdoc.writeMap(ew);
if (children != null) {
for (SolrInputDocument child : children) {
writeSolrInputDocument(child);
@@ -891,26 +896,53 @@ public class JavaBinCodec implements PushWriter {
private StringBytes bytesRef = new StringBytes(bytes,0,0);
public CharSequence readStr(DataInputInputStream dis) throws IOException {
- return readStr(dis,null);
+ return readStr(dis, null, readStringAsCharSeq);
}
- public CharSequence readStr(DataInputInputStream dis, StringCache stringCache) throws IOException {
+ public CharSequence readStr(DataInputInputStream dis, StringCache stringCache, boolean readStringAsCharSeq) throws IOException {
+ if (readStringAsCharSeq) {
+ return readUtf8(dis);
+ }
int sz = readSize(dis);
+ return _readStr(dis, stringCache, sz);
+ }
+
+ private CharSequence _readStr(DataInputInputStream dis, StringCache stringCache, int sz) throws IOException {
if (bytes == null || bytes.length < sz) bytes = new byte[sz];
dis.readFully(bytes, 0, sz);
if (stringCache != null) {
return stringCache.get(bytesRef.reset(bytes, 0, sz));
} else {
arr.reset();
- if (readStringAsCharSeq) {
- byte[] copyBuf = new byte[sz];
- System.arraycopy(bytes, 0, copyBuf, 0, sz);
- return new ByteArrayUtf8CharSequence(copyBuf, 0, sz);
- } else {
- ByteUtils.UTF8toUTF16(bytes, 0, sz, arr);
- return arr.toString();
- }
+ ByteUtils.UTF8toUTF16(bytes, 0, sz, arr);
+ return arr.toString();
+ }
+ }
+
+ /////////// code to optimize reading UTF8
+ static final int MAX_UTF8_SZ = 1024 * 64;//too big strings can cause too much memory allocation
+ private Function<ByteArrayUtf8CharSequence, String> stringProvider;
+ private BytesBlock bytesBlock;
+
+ protected CharSequence readUtf8(DataInputInputStream dis) throws IOException {
+ int sz = readSize(dis);
+ if (sz > MAX_UTF8_SZ) return _readStr(dis, null, sz);
+ if (bytesBlock == null) bytesBlock = new BytesBlock(1024 * 4);
+ BytesBlock block = this.bytesBlock.expand(sz);
+ dis.readFully(block.getBuf(), block.getStartPos(), sz);
+
+ ByteArrayUtf8CharSequence result = new ByteArrayUtf8CharSequence(block.getBuf(), block.getStartPos(), sz);
+ if (stringProvider == null) {
+ stringProvider = butf8cs -> {
+ synchronized (JavaBinCodec.this) {
+ arr.reset();
+ ByteUtils.UTF8toUTF16(butf8cs.buf, butf8cs.offset(), butf8cs.size(), arr);
+ return arr.toString();
+ }
+ };
}
+ result.stringProvider = this.stringProvider;
+ return result;
}
public void writeInt(int val) throws IOException {
@@ -973,6 +1005,7 @@ public class JavaBinCodec implements PushWriter {
return true;
} else if (val instanceof Utf8CharSequence) {
writeUTF8Str((Utf8CharSequence) val);
+ return true;
} else if (val instanceof CharSequence) {
writeStr((CharSequence) val);
return true;
@@ -1133,7 +1166,7 @@ public class JavaBinCodec implements PushWriter {
return stringsList.get(idx - 1);
} else {// idx == 0 means it has a string value
tagByte = fis.readByte();
- CharSequence s = readStr(fis, stringCache);
+ CharSequence s = readStr(fis, stringCache, false);
if (s != null) s = s.toString();
if (stringsList == null) stringsList = new ArrayList<>();
stringsList.add(s);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/507a96e4/solr/solrj/src/java/org/apache/solr/common/util/Utf8CharSequence.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/util/Utf8CharSequence.java b/solr/solrj/src/java/org/apache/solr/common/util/Utf8CharSequence.java
index bd056f5..1533e5c 100644
--- a/solr/solrj/src/java/org/apache/solr/common/util/Utf8CharSequence.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/Utf8CharSequence.java
@@ -20,10 +20,10 @@ package org.apache.solr.common.util;
import java.io.IOException;
import java.io.OutputStream;
-/**A byte[] backed String
- *
+/**
+ * A byte[] backed String
*/
-public interface Utf8CharSequence extends CharSequence {
+public interface Utf8CharSequence extends CharSequence , Comparable {
/**
* Write the bytes into a buffer. The objective is to avoid the local bytes being exposed to
@@ -31,20 +31,31 @@ public interface Utf8CharSequence extends CharSequence {
* possible into the buffer and then return how many bytes were written. It's the responsibility
* of the caller to call this method repeatedly and ensure that everything is completely written
*
- * @param start position from which to start writing
+ * @param start position from which to start writing
* @param buffer the buffer to which to write to
* @param pos position to start writing
* @return no:of bytes written
*/
int write(int start, byte[] buffer, int pos);
- /** The size of utf8 bytes
+ /**
+ * The size of utf8 bytes
+ *
* @return the size
*/
int size();
+ byte byteAt(int idx);
+
+ @Override
+ default int compareTo(Object o) {
+ if(o == null) return 1;
+ return toString().compareTo(o.toString());
+ }
+
/**
* Creates a byte[] and copy to it first before writing it out to the output
+ *
* @param os The sink
*/
default void write(OutputStream os) throws IOException {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/507a96e4/solr/solrj/src/test/org/apache/solr/common/util/Utf8CharSequenceTest.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/test/org/apache/solr/common/util/Utf8CharSequenceTest.java b/solr/solrj/src/test/org/apache/solr/common/util/Utf8CharSequenceTest.java
index bff362c..bf3fd26 100644
--- a/solr/solrj/src/test/org/apache/solr/common/util/Utf8CharSequenceTest.java
+++ b/solr/solrj/src/test/org/apache/solr/common/util/Utf8CharSequenceTest.java
@@ -58,4 +58,45 @@ public class Utf8CharSequenceTest extends SolrTestCaseJ4 {
utf81 = (ByteArrayUtf8CharSequence) m1.get("str");
assertTrue(utf81.equals(utf8));
}
+
+ public void testUnMarshal() throws IOException {
+ NamedList nl = new NamedList();
+ String str = " The value!";
+ for (int i = 0; i < 5; i++) {
+ StringBuffer sb = new StringBuffer();
+ sb.append(i);
+ for (int j = 0; j < i; j++) {
+ sb.append(str);
+ }
+ nl.add("key" + i, sb.toString());
+ }
+ StringBuffer sb = new StringBuffer();
+ for (; ; ) {
+ sb.append(str);
+ if (sb.length() > 1024 * 4) break;
+ }
+ nl.add("key_long", sb.toString());
+ nl.add("key5", "5" + str);
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ new JavaBinCodec().marshal(nl, baos);
+ byte[] bytes = baos.toByteArray();
+
+ NamedList nl1 = (NamedList) new JavaBinCodec()
+ .setReadStringAsCharSeq(true)
+ .unmarshal(new FastInputStream(null, bytes, 0, bytes.length));
+ byte[] buf = ((ByteArrayUtf8CharSequence) nl1.getVal(0)).getBuf();
+ ByteArrayUtf8CharSequence valLong = (ByteArrayUtf8CharSequence) nl1.get("key_long");
+ assertFalse(valLong.getBuf() == buf);
+
+ for (int i = 1; i < 6; i++) {
+ ByteArrayUtf8CharSequence val = (ByteArrayUtf8CharSequence) nl1.get("key" + i);
+ assertEquals(buf, val.getBuf());
+ String s = val.toString();
+ assertTrue(s.startsWith("" + i));
+ assertTrue(s, s.endsWith(str));
+ }
+
+ }
+
+
}
[2/3] lucene-solr:branch_8x: SOLR-12983: tests don't need to use the
optimization
Posted by no...@apache.org.
SOLR-12983: tests don't need to use the optimization
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/6f6a35d8
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/6f6a35d8
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/6f6a35d8
Branch: refs/heads/branch_8x
Commit: 6f6a35d8f7353856476d24dbfe404c4b171dafc2
Parents: 507a96e
Author: Noble Paul <no...@apache.org>
Authored: Tue Jan 8 17:25:26 2019 +1100
Committer: Noble Paul <no...@apache.org>
Committed: Wed Jan 9 13:49:25 2019 +1100
----------------------------------------------------------------------
.../solr/handler/loader/JavabinLoader.java | 4 +++-
.../solrj/request/JavaBinUpdateRequestCodec.java | 19 ++++++++-----------
2 files changed, 11 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6f6a35d8/solr/core/src/java/org/apache/solr/handler/loader/JavabinLoader.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/loader/JavabinLoader.java b/solr/core/src/java/org/apache/solr/handler/loader/JavabinLoader.java
index a4ac256..8bbbde2 100644
--- a/solr/core/src/java/org/apache/solr/handler/loader/JavabinLoader.java
+++ b/solr/core/src/java/org/apache/solr/handler/loader/JavabinLoader.java
@@ -118,7 +118,9 @@ public class JavabinLoader extends ContentStreamLoader {
for (; ; ) {
if (in.peek() == -1) return;
try {
- update = new JavaBinUpdateRequestCodec().unmarshal(in, handler);
+ update = new JavaBinUpdateRequestCodec()
+ .setReadStringAsCharSeq(true)
+ .unmarshal(in, handler);
} catch (EOFException e) {
break; // this is expected
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6f6a35d8/solr/solrj/src/java/org/apache/solr/client/solrj/request/JavaBinUpdateRequestCodec.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/request/JavaBinUpdateRequestCodec.java b/solr/solrj/src/java/org/apache/solr/client/solrj/request/JavaBinUpdateRequestCodec.java
index 59072c5..05d955e 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/request/JavaBinUpdateRequestCodec.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/request/JavaBinUpdateRequestCodec.java
@@ -51,6 +51,13 @@ import static org.apache.solr.common.util.ByteArrayUtf8CharSequence.convertCharS
* @since solr 1.4
*/
public class JavaBinUpdateRequestCodec {
+ private boolean readStringAsCharSeq = false;
+
+ public JavaBinUpdateRequestCodec setReadStringAsCharSeq(boolean flag) {
+ this.readStringAsCharSeq = flag;
+ return this;
+
+ }
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final AtomicBoolean WARNED_ABOUT_INDEX_TIME_BOOSTS = new AtomicBoolean();
@@ -274,16 +281,6 @@ public class JavaBinUpdateRequestCodec {
}
- /* @Override
- protected Object readDocumentFieldVal(String fieldName, DataInputInputStream dis) throws IOException {
- super.readStringAsCharSeq = utf8FieldPredicate != null && utf8FieldPredicate.test(fieldName);
- try {
- return super.readDocumentFieldVal(fieldName, dis);
- } finally {
- super.readStringAsCharSeq = false;
- }
- }*/
-
private List readOuterMostDocIterator(DataInputInputStream fis) throws IOException {
NamedList params = (NamedList) namedList[0].get("params");
updateRequest.setParams(new ModifiableSolrParams(params.toSolrParams()));
@@ -291,7 +288,7 @@ public class JavaBinUpdateRequestCodec {
Integer commitWithin = null;
Boolean overwrite = null;
Object o = null;
- super.readStringAsCharSeq = true;
+ super.readStringAsCharSeq = JavaBinUpdateRequestCodec.this.readStringAsCharSeq;
try {
while (true) {
if (o == null) {
[3/3] lucene-solr:branch_8x: SOLR-12983: Create DocValues fields
directly from byte[]
Posted by no...@apache.org.
SOLR-12983: Create DocValues fields directly from byte[]
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/0d4c81f2
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/0d4c81f2
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/0d4c81f2
Branch: refs/heads/branch_8x
Commit: 0d4c81f2f9d354514c323e2876eea71b901021ca
Parents: 6f6a35d
Author: Noble Paul <no...@apache.org>
Authored: Wed Jan 9 13:44:41 2019 +1100
Committer: Noble Paul <no...@apache.org>
Committed: Wed Jan 9 13:49:36 2019 +1100
----------------------------------------------------------------------
.../org/apache/solr/schema/SortableTextField.java | 14 +++++++++++++-
.../src/java/org/apache/solr/schema/StrField.java | 10 +++++++++-
.../java/org/apache/solr/update/DocumentBuilder.java | 1 -
3 files changed, 22 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0d4c81f2/solr/core/src/java/org/apache/solr/schema/SortableTextField.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/schema/SortableTextField.java b/solr/core/src/java/org/apache/solr/schema/SortableTextField.java
index 1d2c21d..1d48c84 100644
--- a/solr/core/src/java/org/apache/solr/schema/SortableTextField.java
+++ b/solr/core/src/java/org/apache/solr/schema/SortableTextField.java
@@ -29,6 +29,7 @@ import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource;
import org.apache.lucene.search.*;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.SolrException;
+import org.apache.solr.common.util.ByteArrayUtf8CharSequence;
import org.apache.solr.search.QParser;
import org.apache.solr.uninverting.UninvertingReader.Type;
@@ -103,6 +104,13 @@ public class SortableTextField extends TextField {
if (! field.hasDocValues()) {
return Collections.singletonList(f);
}
+ if (value instanceof ByteArrayUtf8CharSequence) {
+ ByteArrayUtf8CharSequence utf8 = (ByteArrayUtf8CharSequence) value;
+ if (utf8.size() < maxCharsForDocValues) {
+ BytesRef bytes = new BytesRef(utf8.getBuf(), utf8.offset(), utf8.size());
+ return getIndexableFields(field, f, bytes);
+ }
+ }
final String origString = value.toString();
final int origLegth = origString.length();
final boolean truncate = maxCharsForDocValues < origLegth;
@@ -116,7 +124,11 @@ public class SortableTextField extends TextField {
maxCharsForDocValues + " when useDocValuesAsStored=true (length=" + origLegth + ")");
}
final BytesRef bytes = new BytesRef(truncate ? origString.subSequence(0, maxCharsForDocValues) : origString);
-
+
+ return getIndexableFields(field, f, bytes);
+ }
+
+ private static List<IndexableField> getIndexableFields(SchemaField field, IndexableField f, BytesRef bytes) {
final IndexableField docval = field.multiValued()
? new SortedSetDocValuesField(field.getName(), bytes)
: new SortedDocValuesField(field.getName(), bytes);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0d4c81f2/solr/core/src/java/org/apache/solr/schema/StrField.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/schema/StrField.java b/solr/core/src/java/org/apache/solr/schema/StrField.java
index a8ec62c..3413ce1 100644
--- a/solr/core/src/java/org/apache/solr/schema/StrField.java
+++ b/solr/core/src/java/org/apache/solr/schema/StrField.java
@@ -31,6 +31,7 @@ import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedSetSelector;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.SolrException;
+import org.apache.solr.common.util.ByteArrayUtf8CharSequence;
import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser;
import org.apache.solr.uninverting.UninvertingReader.Type;
@@ -48,7 +49,7 @@ public class StrField extends PrimitiveFieldType {
if (field.hasDocValues()) {
IndexableField docval;
- final BytesRef bytes = new BytesRef(value.toString());
+ final BytesRef bytes = getBytesRef(value);
if (field.multiValued()) {
docval = new SortedSetDocValuesField(field.getName(), bytes);
} else {
@@ -68,6 +69,13 @@ public class StrField extends PrimitiveFieldType {
return Collections.singletonList(fval);
}
+ public static BytesRef getBytesRef(Object value) {
+ if (value instanceof ByteArrayUtf8CharSequence) {
+ ByteArrayUtf8CharSequence utf8 = (ByteArrayUtf8CharSequence) value;
+ return new BytesRef(utf8.getBuf(), utf8.offset(), utf8.size());
+ } else return new BytesRef(value.toString());
+ }
+
@Override
public boolean isUtf8Field() {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0d4c81f2/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java b/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
index 6aef1b4..38b10b9 100644
--- a/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
+++ b/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
@@ -166,7 +166,6 @@ public class DocumentBuilder {
if( v == null ) {
continue;
}
- v = ByteArrayUtf8CharSequence.convertCharSeq(v);
hasField = true;
if (sfield != null) {
used = true;