You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by dw...@apache.org on 2022/03/01 06:26:35 UTC
[solr] branch main updated: SOLR-16059: FieldType.DefaultAnalyzer: correct reader.read() loop and… (#712)
This is an automated email from the ASF dual-hosted git repository.
dweiss pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/main by this push:
new bb85382 SOLR-16059: FieldType.DefaultAnalyzer: correct reader.read() loop and… (#712)
bb85382 is described below
commit bb85382c3bef10fda52e5a26bb8e2d15b01a662f
Author: Dawid Weiss <da...@carrotsearch.com>
AuthorDate: Tue Mar 1 07:25:25 2022 +0100
SOLR-16059: FieldType.DefaultAnalyzer: correct reader.read() loop and… (#712)
SOLR-16059: FieldType.DefaultAnalyzer: correct reader.read() loop and emit final offset properly.
---
.../src/java/org/apache/solr/schema/FieldType.java | 41 ++++++++++++++++++++--
.../apache/solr/schema/StrFieldAnalyzerTest.java | 36 +++++++++++++++++++
2 files changed, 74 insertions(+), 3 deletions(-)
diff --git a/solr/core/src/java/org/apache/solr/schema/FieldType.java b/solr/core/src/java/org/apache/solr/schema/FieldType.java
index bf4461a..5154562 100644
--- a/solr/core/src/java/org/apache/solr/schema/FieldType.java
+++ b/solr/core/src/java/org/apache/solr/schema/FieldType.java
@@ -502,15 +502,36 @@ public abstract class FieldType extends FieldProperties {
@Override
public TokenStreamComponents createComponents(String fieldName) {
Tokenizer ts = new Tokenizer() {
+ private boolean done = false;
+ private int finalOffset;
+
final char[] cbuf = new char[maxChars];
final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
final BytesTermAttribute bytesAtt = isPointField() ? addAttribute(BytesTermAttribute.class) : null;
final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+
@Override
public boolean incrementToken() throws IOException {
+ if (done) {
+ return false;
+ }
+
clearAttributes();
- int n = input.read(cbuf,0,maxChars);
- if (n<=0) return false;
+ done = true;
+
+ int max = maxChars;
+ int n = 0;
+ while (max > 0) {
+ int len = input.read(cbuf, n, max);
+ if (len < 0) break;
+ max -= len;
+ n += len;
+ }
+
+ if (n == 0) {
+ return false;
+ }
+
if (isPointField()) {
BytesRef b = ((PointField)FieldType.this).toInternalByteRef(new String(cbuf, 0, n));
bytesAtt.setBytesRef(b);
@@ -518,9 +539,23 @@ public abstract class FieldType extends FieldProperties {
String s = toInternal(new String(cbuf, 0, n));
termAtt.setEmpty().append(s);
}
- offsetAtt.setOffset(correctOffset(0),correctOffset(n));
+
+ finalOffset = correctOffset(n);
+ offsetAtt.setOffset(correctOffset(0), finalOffset);
return true;
}
+
+ @Override
+ public void end() throws IOException {
+ super.end();
+ offsetAtt.setOffset(finalOffset, finalOffset);
+ }
+
+ @Override
+ public void reset() throws IOException {
+ super.reset();
+ this.done = false;
+ }
};
return new TokenStreamComponents(ts);
diff --git a/solr/core/src/test/org/apache/solr/schema/StrFieldAnalyzerTest.java b/solr/core/src/test/org/apache/solr/schema/StrFieldAnalyzerTest.java
new file mode 100644
index 0000000..b893cfb
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/schema/StrFieldAnalyzerTest.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.schema;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+
+import java.io.IOException;
+
+public class StrFieldAnalyzerTest extends BaseTokenStreamTestCase {
+ public void testOffsetSanity() throws IOException {
+ Analyzer analyzer = new StrField().getIndexAnalyzer();
+ assertTokenStreamContents(
+ analyzer.tokenStream("fieldName", "abc"),
+ new String[] {"abc"},
+ new int[] {0},
+ new int[] {3},
+ new int[] {1},
+ new int[] {1},
+ 3);
+ }
+}