You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by dw...@apache.org on 2022/03/01 06:26:35 UTC

[solr] branch main updated: SOLR-16059: FieldType.DefaultAnalyzer: correct reader.read() loop and… (#712)

This is an automated email from the ASF dual-hosted git repository.

dweiss pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/main by this push:
     new bb85382  SOLR-16059: FieldType.DefaultAnalyzer: correct reader.read() loop and… (#712)
bb85382 is described below

commit bb85382c3bef10fda52e5a26bb8e2d15b01a662f
Author: Dawid Weiss <da...@carrotsearch.com>
AuthorDate: Tue Mar 1 07:25:25 2022 +0100

    SOLR-16059: FieldType.DefaultAnalyzer: correct reader.read() loop and… (#712)
    
    SOLR-16059: FieldType.DefaultAnalyzer: correct reader.read() loop and emit final offset properly.
---
 .../src/java/org/apache/solr/schema/FieldType.java | 41 ++++++++++++++++++++--
 .../apache/solr/schema/StrFieldAnalyzerTest.java   | 36 +++++++++++++++++++
 2 files changed, 74 insertions(+), 3 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/schema/FieldType.java b/solr/core/src/java/org/apache/solr/schema/FieldType.java
index bf4461a..5154562 100644
--- a/solr/core/src/java/org/apache/solr/schema/FieldType.java
+++ b/solr/core/src/java/org/apache/solr/schema/FieldType.java
@@ -502,15 +502,36 @@ public abstract class FieldType extends FieldProperties {
     @Override
     public TokenStreamComponents createComponents(String fieldName) {
       Tokenizer ts = new Tokenizer() {
+        private boolean done = false;
+        private int finalOffset;
+
         final char[] cbuf = new char[maxChars];
         final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
         final BytesTermAttribute bytesAtt = isPointField() ? addAttribute(BytesTermAttribute.class) : null;
         final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+
         @Override
         public boolean incrementToken() throws IOException {
+          if (done) {
+            return false;
+          }
+
           clearAttributes();
-          int n = input.read(cbuf,0,maxChars);
-          if (n<=0) return false;
+          done = true;
+
+          int max = maxChars;
+          int n = 0;
+          while (max > 0) {
+            int len = input.read(cbuf, n, max);
+            if (len < 0) break;
+            max -= len;
+            n += len;
+          }
+
+          if (n == 0) {
+            return false;
+          }
+
           if (isPointField()) {
             BytesRef b = ((PointField)FieldType.this).toInternalByteRef(new String(cbuf, 0, n));
             bytesAtt.setBytesRef(b);
@@ -518,9 +539,23 @@ public abstract class FieldType extends FieldProperties {
             String s = toInternal(new String(cbuf, 0, n));
             termAtt.setEmpty().append(s);
           }
-          offsetAtt.setOffset(correctOffset(0),correctOffset(n));
+
+          finalOffset = correctOffset(n);
+          offsetAtt.setOffset(correctOffset(0), finalOffset);
           return true;
         }
+
+        @Override
+        public void end() throws IOException {
+          super.end();
+          offsetAtt.setOffset(finalOffset, finalOffset);
+        }
+
+        @Override
+        public void reset() throws IOException {
+          super.reset();
+          this.done = false;
+        }
       };
 
       return new TokenStreamComponents(ts);
diff --git a/solr/core/src/test/org/apache/solr/schema/StrFieldAnalyzerTest.java b/solr/core/src/test/org/apache/solr/schema/StrFieldAnalyzerTest.java
new file mode 100644
index 0000000..b893cfb
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/schema/StrFieldAnalyzerTest.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.schema;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+
+import java.io.IOException;
+
+public class StrFieldAnalyzerTest extends BaseTokenStreamTestCase {
+  public void testOffsetSanity() throws IOException {
+    Analyzer analyzer = new StrField().getIndexAnalyzer();
+    assertTokenStreamContents(
+        analyzer.tokenStream("fieldName", "abc"),
+        new String[] {"abc"},
+        new int[] {0},
+        new int[] {3},
+        new int[] {1},
+        new int[] {1},
+        3);
+  }
+}