You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@lucene.apache.org by GitBox <gi...@apache.org> on 2021/01/26 03:40:58 UTC
[GitHub] [lucene-solr] muse-dev[bot] commented on a change in pull request #2091: Jira/solr 14778

muse-dev[bot] commented on a change in pull request #2091:
URL: https://github.com/apache/lucene-solr/pull/2091#discussion_r564197768



##########
File path: lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/TestDelimitedPayloadTokenFilter.java
##########
@@ -94,7 +98,8 @@ public void testFloatEncoding() throws Exception {

Review comment:
       *NULLPTR_DEREFERENCE:*  call to `TestDelimitedPayloadTokenFilter.assertTermEquals(...)` eventually accesses memory that is the null pointer on line 84.

##########
File path: lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/TestDelimitedPayloadTokenFilter.java
##########
@@ -120,26 +125,37 @@ void assertTermEquals(String expected, TokenStream stream, byte[] expectPay) thr

Review comment:
       *NULLPTR_DEREFERENCE:*  call to `TestDelimitedPayloadTokenFilter.assertTermEquals(...)` eventually accesses memory that is the null pointer on line 106.

##########
File path: lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/TestDelimitedPayloadTokenFilter.java
##########
@@ -51,9 +53,11 @@ public void testPayloads() throws Exception {
   public void testNext() throws Exception {
 
     String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
-    DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter
-      (whitespaceMockTokenizer(test), 
-       DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder());
+    DelimitedPayloadTokenFilter filter =
+        new DelimitedPayloadTokenFilter(
+            whitespaceMockTokenizer(test),
+            DelimitedPayloadTokenFilter.DEFAULT_DELIMITER,
+            new IdentityEncoder());
     filter.reset();
     assertTermEquals("The", filter, null);

Review comment:
       *NULLPTR_DEREFERENCE:*  call to `TestDelimitedPayloadTokenFilter.assertTermEquals(...)` eventually accesses memory that is the null pointer on line 62.

##########
File path: lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/TestDelimitedPayloadTokenFilter.java
##########
@@ -51,9 +53,11 @@ public void testPayloads() throws Exception {

Review comment:
       *NULLPTR_DEREFERENCE:*  call to `TestDelimitedPayloadTokenFilter.assertTermEquals(...)` eventually accesses memory that is the null pointer on line 38.

##########
File path: lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/TestShingleAnalyzerWrapper.java
##########
@@ -0,0 +1,505 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.shingle;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.Directory;
+
+/** A test class for ShingleAnalyzerWrapper as regards queries and scoring. */
+public class TestShingleAnalyzerWrapper extends BaseTokenStreamTestCase {
+  private Analyzer analyzer;
+  private IndexSearcher searcher;
+  private IndexReader reader;
+  private Directory directory;
+
+  /**
+   * Set up a new index in RAM with three test phrases and the supplied Analyzer.
+   *
+   * @throws Exception if an error occurs with index writer or searcher
+   */
+  @Override
+  public void setUp() throws Exception {
+    super.setUp();
+    analyzer =
+        new ShingleAnalyzerWrapper(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 2);
+    directory = newDirectory();
+    IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(analyzer));
+
+    Document doc;
+    doc = new Document();
+    doc.add(new TextField("content", "please divide this sentence into shingles", Field.Store.YES));
+    writer.addDocument(doc);
+
+    doc = new Document();
+    doc.add(new TextField("content", "just another test sentence", Field.Store.YES));
+    writer.addDocument(doc);
+
+    doc = new Document();
+    doc.add(new TextField("content", "a sentence which contains no test", Field.Store.YES));
+    writer.addDocument(doc);
+
+    writer.close();
+
+    reader = DirectoryReader.open(directory);
+    searcher = newSearcher(reader);
+  }
+
+  @Override
+  public void tearDown() throws Exception {
+    reader.close();
+    directory.close();
+    analyzer.close();
+    super.tearDown();
+  }
+
+  protected void compareRanks(ScoreDoc[] hits, int[] ranks) throws Exception {
+    assertEquals(ranks.length, hits.length);
+    for (int i = 0; i < ranks.length; i++) {
+      assertEquals(ranks[i], hits[i].doc);
+    }
+  }
+
+  /*
+   * This shows how to construct a phrase query containing shingles.
+   */
+  public void testShingleAnalyzerWrapperPhraseQuery() throws Exception {
+    PhraseQuery.Builder builder = new PhraseQuery.Builder();
+    try (TokenStream ts = analyzer.tokenStream("content", "this sentence")) {
+      int j = -1;
+
+      PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
+      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+
+      ts.reset();
+      while (ts.incrementToken()) {
+        j += posIncrAtt.getPositionIncrement();
+        String termText = termAtt.toString();
+        builder.add(new Term("content", termText), j);
+      }
+      ts.end();
+    }
+
+    PhraseQuery q = builder.build();
+    ScoreDoc[] hits = searcher.search(q, 1000).scoreDocs;
+    int[] ranks = new int[] {0};
+    compareRanks(hits, ranks);
+  }
+
+  /*
+   * How to construct a boolean query with shingles. A query like this will
+   * implicitly score those documents higher that contain the words in the query
+   * in the right order and adjacent to each other.
+   */
+  public void testShingleAnalyzerWrapperBooleanQuery() throws Exception {
+    BooleanQuery.Builder q = new BooleanQuery.Builder();
+
+    try (TokenStream ts = analyzer.tokenStream("content", "test sentence")) {
+      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+
+      ts.reset();
+      while (ts.incrementToken()) {
+        String termText = termAtt.toString();
+        q.add(new TermQuery(new Term("content", termText)), BooleanClause.Occur.SHOULD);
+      }
+      ts.end();
+    }
+
+    ScoreDoc[] hits = searcher.search(q.build(), 1000).scoreDocs;
+    int[] ranks = new int[] {1, 2, 0};
+    compareRanks(hits, ranks);
+  }
+
+  public void testReusableTokenStream() throws Exception {
+    Analyzer a =
+        new ShingleAnalyzerWrapper(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 2);
+    assertAnalyzesTo(
+        a,
+        "please divide into shingles",
+        new String[] {
+          "please", "please divide", "divide", "divide into", "into", "into shingles", "shingles"
+        },
+        new int[] {0, 0, 7, 7, 14, 14, 19},
+        new int[] {6, 13, 13, 18, 18, 27, 27},
+        new int[] {1, 0, 1, 0, 1, 0, 1});
+    assertAnalyzesTo(
+        a,
+        "divide me up again",
+        new String[] {"divide", "divide me", "me", "me up", "up", "up again", "again"},
+        new int[] {0, 0, 7, 7, 10, 10, 13},
+        new int[] {6, 9, 9, 12, 12, 18, 18},
+        new int[] {1, 0, 1, 0, 1, 0, 1});
+    a.close();
+  }
+
+  public void testNonDefaultMinShingleSize() throws Exception {
+    ShingleAnalyzerWrapper analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 3, 4);
+    assertAnalyzesTo(
+        analyzer,
+        "please divide this sentence into shingles",
+        new String[] {
+          "please",
+          "please divide this",
+          "please divide this sentence",
+          "divide",
+          "divide this sentence",
+          "divide this sentence into",
+          "this",
+          "this sentence into",
+          "this sentence into shingles",
+          "sentence",
+          "sentence into shingles",
+          "into",
+          "shingles"
+        },
+        new int[] {0, 0, 0, 7, 7, 7, 14, 14, 14, 19, 19, 28, 33},
+        new int[] {6, 18, 27, 13, 27, 32, 18, 32, 41, 27, 41, 32, 41},
+        new int[] {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1});
+    analyzer.close();
+
+    analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false),
+            3,
+            4,
+            ShingleFilter.DEFAULT_TOKEN_SEPARATOR,
+            false,
+            false,
+            ShingleFilter.DEFAULT_FILLER_TOKEN);
+    assertAnalyzesTo(
+        analyzer,
+        "please divide this sentence into shingles",
+        new String[] {
+          "please divide this", "please divide this sentence",
+          "divide this sentence", "divide this sentence into",
+          "this sentence into", "this sentence into shingles",
+          "sentence into shingles"
+        },
+        new int[] {0, 0, 7, 7, 14, 14, 19},
+        new int[] {18, 27, 27, 32, 32, 41, 41},
+        new int[] {1, 0, 1, 0, 1, 0, 1});
+    analyzer.close();
+  }
+
+  public void testNonDefaultMinAndSameMaxShingleSize() throws Exception {
+    ShingleAnalyzerWrapper analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 3, 3);
+    assertAnalyzesTo(
+        analyzer,
+        "please divide this sentence into shingles",
+        new String[] {
+          "please",
+          "please divide this",
+          "divide",
+          "divide this sentence",
+          "this",
+          "this sentence into",
+          "sentence",
+          "sentence into shingles",
+          "into",
+          "shingles"
+        },
+        new int[] {0, 0, 7, 7, 14, 14, 19, 19, 28, 33},
+        new int[] {6, 18, 13, 27, 18, 32, 27, 41, 32, 41},
+        new int[] {1, 0, 1, 0, 1, 0, 1, 0, 1, 1});
+    analyzer.close();
+
+    analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false),
+            3,
+            3,
+            ShingleFilter.DEFAULT_TOKEN_SEPARATOR,
+            false,
+            false,
+            ShingleFilter.DEFAULT_FILLER_TOKEN);
+    assertAnalyzesTo(
+        analyzer,
+        "please divide this sentence into shingles",
+        new String[] {
+          "please divide this",
+          "divide this sentence",
+          "this sentence into",
+          "sentence into shingles"
+        },
+        new int[] {0, 7, 14, 19},
+        new int[] {18, 27, 32, 41},
+        new int[] {1, 1, 1, 1});
+    analyzer.close();
+  }
+
+  public void testNoTokenSeparator() throws Exception {
+    ShingleAnalyzerWrapper analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false),
+            ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
+            ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
+            "",
+            true,
+            false,
+            ShingleFilter.DEFAULT_FILLER_TOKEN);
+    assertAnalyzesTo(
+        analyzer,
+        "please divide into shingles",
+        new String[] {
+          "please", "pleasedivide",
+          "divide", "divideinto",
+          "into", "intoshingles",
+          "shingles"
+        },
+        new int[] {0, 0, 7, 7, 14, 14, 19},
+        new int[] {6, 13, 13, 18, 18, 27, 27},
+        new int[] {1, 0, 1, 0, 1, 0, 1});
+    analyzer.close();
+
+    analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false),
+            ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
+            ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
+            "",
+            false,
+            false,
+            ShingleFilter.DEFAULT_FILLER_TOKEN);
+    assertAnalyzesTo(
+        analyzer,
+        "please divide into shingles",
+        new String[] {"pleasedivide", "divideinto", "intoshingles"},
+        new int[] {0, 7, 14},
+        new int[] {13, 18, 27},
+        new int[] {1, 1, 1});
+    analyzer.close();
+  }
+
+  public void testNullTokenSeparator() throws Exception {
+    ShingleAnalyzerWrapper analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false),
+            ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
+            ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
+            null,
+            true,
+            false,
+            ShingleFilter.DEFAULT_FILLER_TOKEN);
+    assertAnalyzesTo(
+        analyzer,
+        "please divide into shingles",
+        new String[] {
+          "please", "pleasedivide",
+          "divide", "divideinto",
+          "into", "intoshingles",
+          "shingles"
+        },
+        new int[] {0, 0, 7, 7, 14, 14, 19},
+        new int[] {6, 13, 13, 18, 18, 27, 27},
+        new int[] {1, 0, 1, 0, 1, 0, 1});
+    analyzer.close();
+
+    analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false),
+            ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
+            ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
+            "",
+            false,
+            false,
+            ShingleFilter.DEFAULT_FILLER_TOKEN);
+    assertAnalyzesTo(
+        analyzer,
+        "please divide into shingles",
+        new String[] {"pleasedivide", "divideinto", "intoshingles"},
+        new int[] {0, 7, 14},
+        new int[] {13, 18, 27},
+        new int[] {1, 1, 1});
+    analyzer.close();
+  }
+
+  public void testAltTokenSeparator() throws Exception {
+    ShingleAnalyzerWrapper analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false),
+            ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
+            ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
+            "<SEP>",
+            true,
+            false,
+            ShingleFilter.DEFAULT_FILLER_TOKEN);
+    assertAnalyzesTo(

Review comment:
       *NULL_DEREFERENCE:*  object `analyzer.reuseStrategy` last assigned on line 360 could be null and is dereferenced by call to `assertAnalyzesTo(...)` at line 367.

##########
File path: lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/TestShingleAnalyzerWrapper.java
##########
@@ -0,0 +1,505 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.shingle;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.Directory;
+
+/** A test class for ShingleAnalyzerWrapper as regards queries and scoring. */
+public class TestShingleAnalyzerWrapper extends BaseTokenStreamTestCase {
+  private Analyzer analyzer;
+  private IndexSearcher searcher;
+  private IndexReader reader;
+  private Directory directory;
+
+  /**
+   * Set up a new index in RAM with three test phrases and the supplied Analyzer.
+   *
+   * @throws Exception if an error occurs with index writer or searcher
+   */
+  @Override
+  public void setUp() throws Exception {
+    super.setUp();
+    analyzer =
+        new ShingleAnalyzerWrapper(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 2);
+    directory = newDirectory();
+    IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(analyzer));
+
+    Document doc;
+    doc = new Document();
+    doc.add(new TextField("content", "please divide this sentence into shingles", Field.Store.YES));
+    writer.addDocument(doc);
+
+    doc = new Document();
+    doc.add(new TextField("content", "just another test sentence", Field.Store.YES));
+    writer.addDocument(doc);
+
+    doc = new Document();
+    doc.add(new TextField("content", "a sentence which contains no test", Field.Store.YES));
+    writer.addDocument(doc);
+
+    writer.close();
+
+    reader = DirectoryReader.open(directory);
+    searcher = newSearcher(reader);
+  }
+
+  @Override
+  public void tearDown() throws Exception {
+    reader.close();
+    directory.close();
+    analyzer.close();
+    super.tearDown();
+  }
+
+  protected void compareRanks(ScoreDoc[] hits, int[] ranks) throws Exception {
+    assertEquals(ranks.length, hits.length);
+    for (int i = 0; i < ranks.length; i++) {
+      assertEquals(ranks[i], hits[i].doc);
+    }
+  }
+
+  /*
+   * This shows how to construct a phrase query containing shingles.
+   */
+  public void testShingleAnalyzerWrapperPhraseQuery() throws Exception {
+    PhraseQuery.Builder builder = new PhraseQuery.Builder();
+    try (TokenStream ts = analyzer.tokenStream("content", "this sentence")) {
+      int j = -1;
+
+      PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
+      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+
+      ts.reset();
+      while (ts.incrementToken()) {
+        j += posIncrAtt.getPositionIncrement();
+        String termText = termAtt.toString();
+        builder.add(new Term("content", termText), j);
+      }
+      ts.end();
+    }
+
+    PhraseQuery q = builder.build();
+    ScoreDoc[] hits = searcher.search(q, 1000).scoreDocs;
+    int[] ranks = new int[] {0};
+    compareRanks(hits, ranks);
+  }
+
+  /*
+   * How to construct a boolean query with shingles. A query like this will
+   * implicitly score those documents higher that contain the words in the query
+   * in the right order and adjacent to each other.
+   */
+  public void testShingleAnalyzerWrapperBooleanQuery() throws Exception {
+    BooleanQuery.Builder q = new BooleanQuery.Builder();
+
+    try (TokenStream ts = analyzer.tokenStream("content", "test sentence")) {
+      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+
+      ts.reset();
+      while (ts.incrementToken()) {
+        String termText = termAtt.toString();
+        q.add(new TermQuery(new Term("content", termText)), BooleanClause.Occur.SHOULD);
+      }
+      ts.end();
+    }
+
+    ScoreDoc[] hits = searcher.search(q.build(), 1000).scoreDocs;
+    int[] ranks = new int[] {1, 2, 0};
+    compareRanks(hits, ranks);
+  }
+
+  public void testReusableTokenStream() throws Exception {
+    Analyzer a =
+        new ShingleAnalyzerWrapper(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 2);
+    assertAnalyzesTo(
+        a,
+        "please divide into shingles",
+        new String[] {
+          "please", "please divide", "divide", "divide into", "into", "into shingles", "shingles"
+        },
+        new int[] {0, 0, 7, 7, 14, 14, 19},
+        new int[] {6, 13, 13, 18, 18, 27, 27},
+        new int[] {1, 0, 1, 0, 1, 0, 1});
+    assertAnalyzesTo(
+        a,
+        "divide me up again",
+        new String[] {"divide", "divide me", "me", "me up", "up", "up again", "again"},
+        new int[] {0, 0, 7, 7, 10, 10, 13},
+        new int[] {6, 9, 9, 12, 12, 18, 18},
+        new int[] {1, 0, 1, 0, 1, 0, 1});
+    a.close();
+  }
+
+  public void testNonDefaultMinShingleSize() throws Exception {
+    ShingleAnalyzerWrapper analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 3, 4);
+    assertAnalyzesTo(
+        analyzer,
+        "please divide this sentence into shingles",
+        new String[] {
+          "please",
+          "please divide this",
+          "please divide this sentence",
+          "divide",
+          "divide this sentence",
+          "divide this sentence into",
+          "this",
+          "this sentence into",
+          "this sentence into shingles",
+          "sentence",
+          "sentence into shingles",
+          "into",
+          "shingles"
+        },
+        new int[] {0, 0, 0, 7, 7, 7, 14, 14, 14, 19, 19, 28, 33},
+        new int[] {6, 18, 27, 13, 27, 32, 18, 32, 41, 27, 41, 32, 41},
+        new int[] {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1});
+    analyzer.close();
+
+    analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false),
+            3,
+            4,
+            ShingleFilter.DEFAULT_TOKEN_SEPARATOR,
+            false,
+            false,
+            ShingleFilter.DEFAULT_FILLER_TOKEN);
+    assertAnalyzesTo(
+        analyzer,
+        "please divide this sentence into shingles",
+        new String[] {
+          "please divide this", "please divide this sentence",
+          "divide this sentence", "divide this sentence into",
+          "this sentence into", "this sentence into shingles",
+          "sentence into shingles"
+        },
+        new int[] {0, 0, 7, 7, 14, 14, 19},
+        new int[] {18, 27, 27, 32, 32, 41, 41},
+        new int[] {1, 0, 1, 0, 1, 0, 1});
+    analyzer.close();
+  }
+
+  public void testNonDefaultMinAndSameMaxShingleSize() throws Exception {
+    ShingleAnalyzerWrapper analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 3, 3);
+    assertAnalyzesTo(

Review comment:
       *NULL_DEREFERENCE:*  object `analyzer.reuseStrategy` last assigned on line 226 could be null and is dereferenced by call to `assertAnalyzesTo(...)` at line 227.

##########
File path: lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/TestShingleAnalyzerWrapper.java
##########
@@ -0,0 +1,505 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.shingle;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.Directory;
+
+/** A test class for ShingleAnalyzerWrapper as regards queries and scoring. */
+public class TestShingleAnalyzerWrapper extends BaseTokenStreamTestCase {
+  private Analyzer analyzer;
+  private IndexSearcher searcher;
+  private IndexReader reader;
+  private Directory directory;
+
+  /**
+   * Set up a new index in RAM with three test phrases and the supplied Analyzer.
+   *
+   * @throws Exception if an error occurs with index writer or searcher
+   */
+  @Override
+  public void setUp() throws Exception {
+    super.setUp();
+    analyzer =
+        new ShingleAnalyzerWrapper(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 2);
+    directory = newDirectory();
+    IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(analyzer));
+
+    Document doc;
+    doc = new Document();
+    doc.add(new TextField("content", "please divide this sentence into shingles", Field.Store.YES));
+    writer.addDocument(doc);
+
+    doc = new Document();
+    doc.add(new TextField("content", "just another test sentence", Field.Store.YES));
+    writer.addDocument(doc);
+
+    doc = new Document();
+    doc.add(new TextField("content", "a sentence which contains no test", Field.Store.YES));
+    writer.addDocument(doc);
+
+    writer.close();
+
+    reader = DirectoryReader.open(directory);
+    searcher = newSearcher(reader);
+  }
+
+  @Override
+  public void tearDown() throws Exception {
+    reader.close();
+    directory.close();
+    analyzer.close();
+    super.tearDown();
+  }
+
+  protected void compareRanks(ScoreDoc[] hits, int[] ranks) throws Exception {
+    assertEquals(ranks.length, hits.length);
+    for (int i = 0; i < ranks.length; i++) {
+      assertEquals(ranks[i], hits[i].doc);
+    }
+  }
+
+  /*
+   * This shows how to construct a phrase query containing shingles.
+   */
+  public void testShingleAnalyzerWrapperPhraseQuery() throws Exception {
+    PhraseQuery.Builder builder = new PhraseQuery.Builder();
+    try (TokenStream ts = analyzer.tokenStream("content", "this sentence")) {
+      int j = -1;
+
+      PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
+      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+
+      ts.reset();
+      while (ts.incrementToken()) {
+        j += posIncrAtt.getPositionIncrement();
+        String termText = termAtt.toString();
+        builder.add(new Term("content", termText), j);
+      }
+      ts.end();
+    }
+
+    PhraseQuery q = builder.build();
+    ScoreDoc[] hits = searcher.search(q, 1000).scoreDocs;
+    int[] ranks = new int[] {0};
+    compareRanks(hits, ranks);
+  }
+
+  /*
+   * How to construct a boolean query with shingles. A query like this will
+   * implicitly score those documents higher that contain the words in the query
+   * in the right order and adjacent to each other.
+   */
+  public void testShingleAnalyzerWrapperBooleanQuery() throws Exception {
+    BooleanQuery.Builder q = new BooleanQuery.Builder();
+
+    try (TokenStream ts = analyzer.tokenStream("content", "test sentence")) {
+      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+
+      ts.reset();
+      while (ts.incrementToken()) {
+        String termText = termAtt.toString();
+        q.add(new TermQuery(new Term("content", termText)), BooleanClause.Occur.SHOULD);
+      }
+      ts.end();
+    }
+
+    ScoreDoc[] hits = searcher.search(q.build(), 1000).scoreDocs;
+    int[] ranks = new int[] {1, 2, 0};
+    compareRanks(hits, ranks);
+  }
+
+  public void testReusableTokenStream() throws Exception {
+    Analyzer a =
+        new ShingleAnalyzerWrapper(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 2);
+    assertAnalyzesTo(
+        a,
+        "please divide into shingles",
+        new String[] {
+          "please", "please divide", "divide", "divide into", "into", "into shingles", "shingles"
+        },
+        new int[] {0, 0, 7, 7, 14, 14, 19},
+        new int[] {6, 13, 13, 18, 18, 27, 27},
+        new int[] {1, 0, 1, 0, 1, 0, 1});
+    assertAnalyzesTo(
+        a,
+        "divide me up again",
+        new String[] {"divide", "divide me", "me", "me up", "up", "up again", "again"},
+        new int[] {0, 0, 7, 7, 10, 10, 13},
+        new int[] {6, 9, 9, 12, 12, 18, 18},
+        new int[] {1, 0, 1, 0, 1, 0, 1});
+    a.close();
+  }
+
+  public void testNonDefaultMinShingleSize() throws Exception {
+    ShingleAnalyzerWrapper analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 3, 4);
+    assertAnalyzesTo(

Review comment:
       *NULL_DEREFERENCE:*  object `analyzer.reuseStrategy` last assigned on line 175 could be null and is dereferenced by call to `assertAnalyzesTo(...)` at line 176.

##########
File path: lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/TestShingleAnalyzerWrapper.java
##########
@@ -0,0 +1,505 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.shingle;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.Directory;
+
+/** A test class for ShingleAnalyzerWrapper as regards queries and scoring. */
+public class TestShingleAnalyzerWrapper extends BaseTokenStreamTestCase {
+  private Analyzer analyzer;
+  private IndexSearcher searcher;
+  private IndexReader reader;
+  private Directory directory;
+
+  /**
+   * Set up a new index in RAM with three test phrases and the supplied Analyzer.
+   *
+   * @throws Exception if an error occurs with index writer or searcher
+   */
+  @Override
+  public void setUp() throws Exception {
+    super.setUp();
+    analyzer =
+        new ShingleAnalyzerWrapper(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 2);
+    directory = newDirectory();
+    IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(analyzer));
+
+    Document doc;
+    doc = new Document();
+    doc.add(new TextField("content", "please divide this sentence into shingles", Field.Store.YES));
+    writer.addDocument(doc);
+
+    doc = new Document();
+    doc.add(new TextField("content", "just another test sentence", Field.Store.YES));
+    writer.addDocument(doc);
+
+    doc = new Document();
+    doc.add(new TextField("content", "a sentence which contains no test", Field.Store.YES));
+    writer.addDocument(doc);
+
+    writer.close();
+
+    reader = DirectoryReader.open(directory);
+    searcher = newSearcher(reader);
+  }
+
+  @Override
+  public void tearDown() throws Exception {
+    reader.close();
+    directory.close();
+    analyzer.close();
+    super.tearDown();
+  }
+
+  protected void compareRanks(ScoreDoc[] hits, int[] ranks) throws Exception {
+    assertEquals(ranks.length, hits.length);
+    for (int i = 0; i < ranks.length; i++) {
+      assertEquals(ranks[i], hits[i].doc);
+    }
+  }
+
+  /*
+   * This shows how to construct a phrase query containing shingles.
+   */
+  public void testShingleAnalyzerWrapperPhraseQuery() throws Exception {
+    PhraseQuery.Builder builder = new PhraseQuery.Builder();
+    try (TokenStream ts = analyzer.tokenStream("content", "this sentence")) {
+      int j = -1;
+
+      PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
+      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+
+      ts.reset();
+      while (ts.incrementToken()) {
+        j += posIncrAtt.getPositionIncrement();
+        String termText = termAtt.toString();
+        builder.add(new Term("content", termText), j);
+      }
+      ts.end();
+    }
+
+    PhraseQuery q = builder.build();
+    ScoreDoc[] hits = searcher.search(q, 1000).scoreDocs;
+    int[] ranks = new int[] {0};
+    compareRanks(hits, ranks);
+  }
+
+  /*
+   * How to construct a boolean query with shingles. A query like this will
+   * implicitly score those documents higher that contain the words in the query
+   * in the right order and adjacent to each other.
+   */
+  public void testShingleAnalyzerWrapperBooleanQuery() throws Exception {
+    BooleanQuery.Builder q = new BooleanQuery.Builder();
+
+    try (TokenStream ts = analyzer.tokenStream("content", "test sentence")) {
+      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+
+      ts.reset();
+      while (ts.incrementToken()) {
+        String termText = termAtt.toString();
+        q.add(new TermQuery(new Term("content", termText)), BooleanClause.Occur.SHOULD);
+      }
+      ts.end();
+    }
+
+    ScoreDoc[] hits = searcher.search(q.build(), 1000).scoreDocs;
+    int[] ranks = new int[] {1, 2, 0};
+    compareRanks(hits, ranks);
+  }
+
+  public void testReusableTokenStream() throws Exception {
+    Analyzer a =
+        new ShingleAnalyzerWrapper(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 2);
+    assertAnalyzesTo(
+        a,
+        "please divide into shingles",
+        new String[] {
+          "please", "please divide", "divide", "divide into", "into", "into shingles", "shingles"
+        },
+        new int[] {0, 0, 7, 7, 14, 14, 19},
+        new int[] {6, 13, 13, 18, 18, 27, 27},
+        new int[] {1, 0, 1, 0, 1, 0, 1});
+    assertAnalyzesTo(
+        a,
+        "divide me up again",
+        new String[] {"divide", "divide me", "me", "me up", "up", "up again", "again"},
+        new int[] {0, 0, 7, 7, 10, 10, 13},
+        new int[] {6, 9, 9, 12, 12, 18, 18},
+        new int[] {1, 0, 1, 0, 1, 0, 1});
+    a.close();
+  }
+
+  public void testNonDefaultMinShingleSize() throws Exception {
+    ShingleAnalyzerWrapper analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 3, 4);
+    assertAnalyzesTo(
+        analyzer,
+        "please divide this sentence into shingles",
+        new String[] {
+          "please",
+          "please divide this",
+          "please divide this sentence",
+          "divide",
+          "divide this sentence",
+          "divide this sentence into",
+          "this",
+          "this sentence into",
+          "this sentence into shingles",
+          "sentence",
+          "sentence into shingles",
+          "into",
+          "shingles"
+        },
+        new int[] {0, 0, 0, 7, 7, 7, 14, 14, 14, 19, 19, 28, 33},
+        new int[] {6, 18, 27, 13, 27, 32, 18, 32, 41, 27, 41, 32, 41},
+        new int[] {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1});
+    analyzer.close();
+
+    analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false),
+            3,
+            4,
+            ShingleFilter.DEFAULT_TOKEN_SEPARATOR,
+            false,
+            false,
+            ShingleFilter.DEFAULT_FILLER_TOKEN);
+    assertAnalyzesTo(
+        analyzer,
+        "please divide this sentence into shingles",
+        new String[] {
+          "please divide this", "please divide this sentence",
+          "divide this sentence", "divide this sentence into",
+          "this sentence into", "this sentence into shingles",
+          "sentence into shingles"
+        },
+        new int[] {0, 0, 7, 7, 14, 14, 19},
+        new int[] {18, 27, 27, 32, 32, 41, 41},
+        new int[] {1, 0, 1, 0, 1, 0, 1});
+    analyzer.close();
+  }
+
+  public void testNonDefaultMinAndSameMaxShingleSize() throws Exception {
+    ShingleAnalyzerWrapper analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 3, 3);
+    assertAnalyzesTo(
+        analyzer,
+        "please divide this sentence into shingles",
+        new String[] {
+          "please",
+          "please divide this",
+          "divide",
+          "divide this sentence",
+          "this",
+          "this sentence into",
+          "sentence",
+          "sentence into shingles",
+          "into",
+          "shingles"
+        },
+        new int[] {0, 0, 7, 7, 14, 14, 19, 19, 28, 33},
+        new int[] {6, 18, 13, 27, 18, 32, 27, 41, 32, 41},
+        new int[] {1, 0, 1, 0, 1, 0, 1, 0, 1, 1});
+    analyzer.close();
+
+    analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false),
+            3,
+            3,
+            ShingleFilter.DEFAULT_TOKEN_SEPARATOR,
+            false,
+            false,
+            ShingleFilter.DEFAULT_FILLER_TOKEN);
+    assertAnalyzesTo(
+        analyzer,
+        "please divide this sentence into shingles",
+        new String[] {
+          "please divide this",
+          "divide this sentence",
+          "this sentence into",
+          "sentence into shingles"
+        },
+        new int[] {0, 7, 14, 19},
+        new int[] {18, 27, 32, 41},
+        new int[] {1, 1, 1, 1});
+    analyzer.close();
+  }
+
+  public void testNoTokenSeparator() throws Exception {
+    ShingleAnalyzerWrapper analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false),
+            ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
+            ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
+            "",
+            true,
+            false,
+            ShingleFilter.DEFAULT_FILLER_TOKEN);
+    assertAnalyzesTo(

Review comment:
       *NULL_DEREFERENCE:*  object `analyzer.reuseStrategy` last assigned on line 274 could be null and is dereferenced by call to `assertAnalyzesTo(...)` at line 281.

##########
File path: lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/TestShingleAnalyzerWrapper.java
##########
@@ -0,0 +1,505 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.shingle;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.Directory;
+
+/** A test class for ShingleAnalyzerWrapper as regards queries and scoring. */
+public class TestShingleAnalyzerWrapper extends BaseTokenStreamTestCase {
+  private Analyzer analyzer;
+  private IndexSearcher searcher;
+  private IndexReader reader;
+  private Directory directory;
+
+  /**
+   * Set up a new index in RAM with three test phrases and the supplied Analyzer.
+   *
+   * @throws Exception if an error occurs with index writer or searcher
+   */
+  @Override
+  public void setUp() throws Exception {
+    super.setUp();
+    analyzer =
+        new ShingleAnalyzerWrapper(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 2);
+    directory = newDirectory();
+    IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(analyzer));
+
+    Document doc;
+    doc = new Document();
+    doc.add(new TextField("content", "please divide this sentence into shingles", Field.Store.YES));
+    writer.addDocument(doc);
+
+    doc = new Document();
+    doc.add(new TextField("content", "just another test sentence", Field.Store.YES));
+    writer.addDocument(doc);
+
+    doc = new Document();
+    doc.add(new TextField("content", "a sentence which contains no test", Field.Store.YES));
+    writer.addDocument(doc);
+
+    writer.close();
+
+    reader = DirectoryReader.open(directory);
+    searcher = newSearcher(reader);
+  }
+
+  @Override
+  public void tearDown() throws Exception {
+    reader.close();
+    directory.close();
+    analyzer.close();
+    super.tearDown();
+  }
+
+  protected void compareRanks(ScoreDoc[] hits, int[] ranks) throws Exception {
+    assertEquals(ranks.length, hits.length);
+    for (int i = 0; i < ranks.length; i++) {
+      assertEquals(ranks[i], hits[i].doc);
+    }
+  }
+
+  /*
+   * This shows how to construct a phrase query containing shingles.
+   */
+  public void testShingleAnalyzerWrapperPhraseQuery() throws Exception {
+    PhraseQuery.Builder builder = new PhraseQuery.Builder();
+    try (TokenStream ts = analyzer.tokenStream("content", "this sentence")) {
+      int j = -1;
+
+      PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
+      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+
+      ts.reset();
+      while (ts.incrementToken()) {
+        j += posIncrAtt.getPositionIncrement();
+        String termText = termAtt.toString();
+        builder.add(new Term("content", termText), j);
+      }
+      ts.end();
+    }
+
+    PhraseQuery q = builder.build();
+    ScoreDoc[] hits = searcher.search(q, 1000).scoreDocs;
+    int[] ranks = new int[] {0};
+    compareRanks(hits, ranks);
+  }
+
+  /*
+   * How to construct a boolean query with shingles. A query like this will
+   * implicitly score those documents higher that contain the words in the query
+   * in the right order and adjacent to each other.
+   */
+  public void testShingleAnalyzerWrapperBooleanQuery() throws Exception {
+    BooleanQuery.Builder q = new BooleanQuery.Builder();
+
+    try (TokenStream ts = analyzer.tokenStream("content", "test sentence")) {
+      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+
+      ts.reset();
+      while (ts.incrementToken()) {
+        String termText = termAtt.toString();
+        q.add(new TermQuery(new Term("content", termText)), BooleanClause.Occur.SHOULD);
+      }
+      ts.end();
+    }
+
+    ScoreDoc[] hits = searcher.search(q.build(), 1000).scoreDocs;
+    int[] ranks = new int[] {1, 2, 0};
+    compareRanks(hits, ranks);
+  }
+
+  public void testReusableTokenStream() throws Exception {
+    Analyzer a =
+        new ShingleAnalyzerWrapper(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 2);
+    assertAnalyzesTo(
+        a,
+        "please divide into shingles",
+        new String[] {
+          "please", "please divide", "divide", "divide into", "into", "into shingles", "shingles"
+        },
+        new int[] {0, 0, 7, 7, 14, 14, 19},
+        new int[] {6, 13, 13, 18, 18, 27, 27},
+        new int[] {1, 0, 1, 0, 1, 0, 1});
+    assertAnalyzesTo(
+        a,
+        "divide me up again",
+        new String[] {"divide", "divide me", "me", "me up", "up", "up again", "again"},
+        new int[] {0, 0, 7, 7, 10, 10, 13},
+        new int[] {6, 9, 9, 12, 12, 18, 18},
+        new int[] {1, 0, 1, 0, 1, 0, 1});
+    a.close();
+  }
+
+  public void testNonDefaultMinShingleSize() throws Exception {
+    ShingleAnalyzerWrapper analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 3, 4);
+    assertAnalyzesTo(
+        analyzer,
+        "please divide this sentence into shingles",
+        new String[] {
+          "please",
+          "please divide this",
+          "please divide this sentence",
+          "divide",
+          "divide this sentence",
+          "divide this sentence into",
+          "this",
+          "this sentence into",
+          "this sentence into shingles",
+          "sentence",
+          "sentence into shingles",
+          "into",
+          "shingles"
+        },
+        new int[] {0, 0, 0, 7, 7, 7, 14, 14, 14, 19, 19, 28, 33},
+        new int[] {6, 18, 27, 13, 27, 32, 18, 32, 41, 27, 41, 32, 41},
+        new int[] {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1});
+    analyzer.close();
+
+    analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false),
+            3,
+            4,
+            ShingleFilter.DEFAULT_TOKEN_SEPARATOR,
+            false,
+            false,
+            ShingleFilter.DEFAULT_FILLER_TOKEN);
+    assertAnalyzesTo(
+        analyzer,
+        "please divide this sentence into shingles",
+        new String[] {
+          "please divide this", "please divide this sentence",
+          "divide this sentence", "divide this sentence into",
+          "this sentence into", "this sentence into shingles",
+          "sentence into shingles"
+        },
+        new int[] {0, 0, 7, 7, 14, 14, 19},
+        new int[] {18, 27, 27, 32, 32, 41, 41},
+        new int[] {1, 0, 1, 0, 1, 0, 1});
+    analyzer.close();
+  }
+
+  public void testNonDefaultMinAndSameMaxShingleSize() throws Exception {
+    ShingleAnalyzerWrapper analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 3, 3);
+    assertAnalyzesTo(
+        analyzer,
+        "please divide this sentence into shingles",
+        new String[] {
+          "please",
+          "please divide this",
+          "divide",
+          "divide this sentence",
+          "this",
+          "this sentence into",
+          "sentence",
+          "sentence into shingles",
+          "into",
+          "shingles"
+        },
+        new int[] {0, 0, 7, 7, 14, 14, 19, 19, 28, 33},
+        new int[] {6, 18, 13, 27, 18, 32, 27, 41, 32, 41},
+        new int[] {1, 0, 1, 0, 1, 0, 1, 0, 1, 1});
+    analyzer.close();
+
+    analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false),
+            3,
+            3,
+            ShingleFilter.DEFAULT_TOKEN_SEPARATOR,
+            false,
+            false,
+            ShingleFilter.DEFAULT_FILLER_TOKEN);
+    assertAnalyzesTo(
+        analyzer,
+        "please divide this sentence into shingles",
+        new String[] {
+          "please divide this",
+          "divide this sentence",
+          "this sentence into",
+          "sentence into shingles"
+        },
+        new int[] {0, 7, 14, 19},
+        new int[] {18, 27, 32, 41},
+        new int[] {1, 1, 1, 1});
+    analyzer.close();
+  }
+
+  public void testNoTokenSeparator() throws Exception {
+    ShingleAnalyzerWrapper analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false),
+            ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
+            ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
+            "",
+            true,
+            false,
+            ShingleFilter.DEFAULT_FILLER_TOKEN);
+    assertAnalyzesTo(
+        analyzer,
+        "please divide into shingles",
+        new String[] {
+          "please", "pleasedivide",
+          "divide", "divideinto",
+          "into", "intoshingles",
+          "shingles"
+        },
+        new int[] {0, 0, 7, 7, 14, 14, 19},
+        new int[] {6, 13, 13, 18, 18, 27, 27},
+        new int[] {1, 0, 1, 0, 1, 0, 1});
+    analyzer.close();
+
+    analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false),
+            ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
+            ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
+            "",
+            false,
+            false,
+            ShingleFilter.DEFAULT_FILLER_TOKEN);
+    assertAnalyzesTo(
+        analyzer,
+        "please divide into shingles",
+        new String[] {"pleasedivide", "divideinto", "intoshingles"},
+        new int[] {0, 7, 14},
+        new int[] {13, 18, 27},
+        new int[] {1, 1, 1});
+    analyzer.close();
+  }
+
+  public void testNullTokenSeparator() throws Exception {
+    ShingleAnalyzerWrapper analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false),
+            ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
+            ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
+            null,
+            true,
+            false,
+            ShingleFilter.DEFAULT_FILLER_TOKEN);
+    assertAnalyzesTo(

Review comment:
       *NULL_DEREFERENCE:*  object `analyzer.reuseStrategy` last assigned on line 317 could be null and is dereferenced by call to `assertAnalyzesTo(...)` at line 324.

##########
File path: lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/TestShingleAnalyzerWrapper.java
##########
@@ -0,0 +1,505 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.shingle;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.Directory;
+
+/** A test class for ShingleAnalyzerWrapper as regards queries and scoring. */
+public class TestShingleAnalyzerWrapper extends BaseTokenStreamTestCase {
+  private Analyzer analyzer;
+  private IndexSearcher searcher;
+  private IndexReader reader;
+  private Directory directory;
+
+  /**
+   * Set up a new index in RAM with three test phrases and the supplied Analyzer.
+   *
+   * @throws Exception if an error occurs with index writer or searcher
+   */
+  @Override
+  public void setUp() throws Exception {
+    super.setUp();
+    analyzer =
+        new ShingleAnalyzerWrapper(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 2);
+    directory = newDirectory();
+    IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(analyzer));
+
+    Document doc;
+    doc = new Document();
+    doc.add(new TextField("content", "please divide this sentence into shingles", Field.Store.YES));
+    writer.addDocument(doc);
+
+    doc = new Document();
+    doc.add(new TextField("content", "just another test sentence", Field.Store.YES));
+    writer.addDocument(doc);
+
+    doc = new Document();
+    doc.add(new TextField("content", "a sentence which contains no test", Field.Store.YES));
+    writer.addDocument(doc);
+
+    writer.close();
+
+    reader = DirectoryReader.open(directory);
+    searcher = newSearcher(reader);
+  }
+
+  @Override
+  public void tearDown() throws Exception {
+    reader.close();
+    directory.close();
+    analyzer.close();
+    super.tearDown();
+  }
+
+  protected void compareRanks(ScoreDoc[] hits, int[] ranks) throws Exception {
+    assertEquals(ranks.length, hits.length);
+    for (int i = 0; i < ranks.length; i++) {
+      assertEquals(ranks[i], hits[i].doc);
+    }
+  }
+
+  /*
+   * This shows how to construct a phrase query containing shingles.
+   */
+  public void testShingleAnalyzerWrapperPhraseQuery() throws Exception {
+    PhraseQuery.Builder builder = new PhraseQuery.Builder();
+    try (TokenStream ts = analyzer.tokenStream("content", "this sentence")) {
+      int j = -1;
+
+      PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
+      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+
+      ts.reset();
+      while (ts.incrementToken()) {
+        j += posIncrAtt.getPositionIncrement();
+        String termText = termAtt.toString();
+        builder.add(new Term("content", termText), j);
+      }
+      ts.end();
+    }
+
+    PhraseQuery q = builder.build();
+    ScoreDoc[] hits = searcher.search(q, 1000).scoreDocs;
+    int[] ranks = new int[] {0};
+    compareRanks(hits, ranks);
+  }
+
+  /*
+   * How to construct a boolean query with shingles. A query like this will
+   * implicitly score those documents higher that contain the words in the query
+   * in the right order and adjacent to each other.
+   */
+  public void testShingleAnalyzerWrapperBooleanQuery() throws Exception {
+    BooleanQuery.Builder q = new BooleanQuery.Builder();
+
+    try (TokenStream ts = analyzer.tokenStream("content", "test sentence")) {
+      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+
+      ts.reset();
+      while (ts.incrementToken()) {
+        String termText = termAtt.toString();
+        q.add(new TermQuery(new Term("content", termText)), BooleanClause.Occur.SHOULD);
+      }
+      ts.end();
+    }
+
+    ScoreDoc[] hits = searcher.search(q.build(), 1000).scoreDocs;
+    int[] ranks = new int[] {1, 2, 0};
+    compareRanks(hits, ranks);
+  }
+
+  public void testReusableTokenStream() throws Exception {
+    Analyzer a =
+        new ShingleAnalyzerWrapper(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 2);
+    assertAnalyzesTo(
+        a,
+        "please divide into shingles",
+        new String[] {
+          "please", "please divide", "divide", "divide into", "into", "into shingles", "shingles"
+        },
+        new int[] {0, 0, 7, 7, 14, 14, 19},
+        new int[] {6, 13, 13, 18, 18, 27, 27},
+        new int[] {1, 0, 1, 0, 1, 0, 1});
+    assertAnalyzesTo(
+        a,
+        "divide me up again",
+        new String[] {"divide", "divide me", "me", "me up", "up", "up again", "again"},
+        new int[] {0, 0, 7, 7, 10, 10, 13},
+        new int[] {6, 9, 9, 12, 12, 18, 18},
+        new int[] {1, 0, 1, 0, 1, 0, 1});
+    a.close();
+  }
+
+  public void testNonDefaultMinShingleSize() throws Exception {
+    ShingleAnalyzerWrapper analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 3, 4);
+    assertAnalyzesTo(
+        analyzer,
+        "please divide this sentence into shingles",
+        new String[] {
+          "please",
+          "please divide this",
+          "please divide this sentence",
+          "divide",
+          "divide this sentence",
+          "divide this sentence into",
+          "this",
+          "this sentence into",
+          "this sentence into shingles",
+          "sentence",
+          "sentence into shingles",
+          "into",
+          "shingles"
+        },
+        new int[] {0, 0, 0, 7, 7, 7, 14, 14, 14, 19, 19, 28, 33},
+        new int[] {6, 18, 27, 13, 27, 32, 18, 32, 41, 27, 41, 32, 41},
+        new int[] {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1});
+    analyzer.close();
+
+    analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false),
+            3,
+            4,
+            ShingleFilter.DEFAULT_TOKEN_SEPARATOR,
+            false,
+            false,
+            ShingleFilter.DEFAULT_FILLER_TOKEN);
+    assertAnalyzesTo(
+        analyzer,
+        "please divide this sentence into shingles",
+        new String[] {
+          "please divide this", "please divide this sentence",
+          "divide this sentence", "divide this sentence into",
+          "this sentence into", "this sentence into shingles",
+          "sentence into shingles"
+        },
+        new int[] {0, 0, 7, 7, 14, 14, 19},
+        new int[] {18, 27, 27, 32, 32, 41, 41},
+        new int[] {1, 0, 1, 0, 1, 0, 1});
+    analyzer.close();
+  }
+
+  public void testNonDefaultMinAndSameMaxShingleSize() throws Exception {
+    ShingleAnalyzerWrapper analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 3, 3);
+    assertAnalyzesTo(
+        analyzer,
+        "please divide this sentence into shingles",
+        new String[] {
+          "please",
+          "please divide this",
+          "divide",
+          "divide this sentence",
+          "this",
+          "this sentence into",
+          "sentence",
+          "sentence into shingles",
+          "into",
+          "shingles"
+        },
+        new int[] {0, 0, 7, 7, 14, 14, 19, 19, 28, 33},
+        new int[] {6, 18, 13, 27, 18, 32, 27, 41, 32, 41},
+        new int[] {1, 0, 1, 0, 1, 0, 1, 0, 1, 1});
+    analyzer.close();
+
+    analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false),
+            3,
+            3,
+            ShingleFilter.DEFAULT_TOKEN_SEPARATOR,
+            false,
+            false,
+            ShingleFilter.DEFAULT_FILLER_TOKEN);
+    assertAnalyzesTo(
+        analyzer,
+        "please divide this sentence into shingles",
+        new String[] {
+          "please divide this",
+          "divide this sentence",
+          "this sentence into",
+          "sentence into shingles"
+        },
+        new int[] {0, 7, 14, 19},
+        new int[] {18, 27, 32, 41},
+        new int[] {1, 1, 1, 1});
+    analyzer.close();
+  }
+
+  public void testNoTokenSeparator() throws Exception {
+    ShingleAnalyzerWrapper analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false),
+            ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
+            ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
+            "",
+            true,
+            false,
+            ShingleFilter.DEFAULT_FILLER_TOKEN);
+    assertAnalyzesTo(
+        analyzer,
+        "please divide into shingles",
+        new String[] {
+          "please", "pleasedivide",
+          "divide", "divideinto",
+          "into", "intoshingles",
+          "shingles"
+        },
+        new int[] {0, 0, 7, 7, 14, 14, 19},
+        new int[] {6, 13, 13, 18, 18, 27, 27},
+        new int[] {1, 0, 1, 0, 1, 0, 1});
+    analyzer.close();
+
+    analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false),
+            ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
+            ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
+            "",
+            false,
+            false,
+            ShingleFilter.DEFAULT_FILLER_TOKEN);
+    assertAnalyzesTo(
+        analyzer,
+        "please divide into shingles",
+        new String[] {"pleasedivide", "divideinto", "intoshingles"},
+        new int[] {0, 7, 14},
+        new int[] {13, 18, 27},
+        new int[] {1, 1, 1});
+    analyzer.close();
+  }
+
+  public void testNullTokenSeparator() throws Exception {
+    ShingleAnalyzerWrapper analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false),
+            ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
+            ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
+            null,
+            true,
+            false,
+            ShingleFilter.DEFAULT_FILLER_TOKEN);
+    assertAnalyzesTo(
+        analyzer,
+        "please divide into shingles",
+        new String[] {
+          "please", "pleasedivide",
+          "divide", "divideinto",
+          "into", "intoshingles",
+          "shingles"
+        },
+        new int[] {0, 0, 7, 7, 14, 14, 19},
+        new int[] {6, 13, 13, 18, 18, 27, 27},
+        new int[] {1, 0, 1, 0, 1, 0, 1});
+    analyzer.close();
+
+    analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false),
+            ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
+            ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
+            "",
+            false,
+            false,
+            ShingleFilter.DEFAULT_FILLER_TOKEN);
+    assertAnalyzesTo(
+        analyzer,
+        "please divide into shingles",
+        new String[] {"pleasedivide", "divideinto", "intoshingles"},
+        new int[] {0, 7, 14},
+        new int[] {13, 18, 27},
+        new int[] {1, 1, 1});
+    analyzer.close();
+  }
+
+  public void testAltTokenSeparator() throws Exception {
+    ShingleAnalyzerWrapper analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false),
+            ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
+            ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
+            "<SEP>",
+            true,
+            false,
+            ShingleFilter.DEFAULT_FILLER_TOKEN);
+    assertAnalyzesTo(
+        analyzer,
+        "please divide into shingles",
+        new String[] {
+          "please", "please<SEP>divide",
+          "divide", "divide<SEP>into",
+          "into", "into<SEP>shingles",
+          "shingles"
+        },
+        new int[] {0, 0, 7, 7, 14, 14, 19},
+        new int[] {6, 13, 13, 18, 18, 27, 27},
+        new int[] {1, 0, 1, 0, 1, 0, 1});
+    analyzer.close();
+
+    analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false),
+            ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
+            ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
+            "<SEP>",
+            false,
+            false,
+            ShingleFilter.DEFAULT_FILLER_TOKEN);
+    assertAnalyzesTo(
+        analyzer,
+        "please divide into shingles",
+        new String[] {"please<SEP>divide", "divide<SEP>into", "into<SEP>shingles"},
+        new int[] {0, 7, 14},
+        new int[] {13, 18, 27},
+        new int[] {1, 1, 1});
+    analyzer.close();
+  }
+
+  public void testAltFillerToken() throws Exception {
+    Analyzer delegate =
+        new Analyzer() {
+          @Override
+          protected TokenStreamComponents createComponents(String fieldName) {
+            CharArraySet stopSet = StopFilter.makeStopSet("into");
+            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+            TokenFilter filter = new StopFilter(tokenizer, stopSet);
+            return new TokenStreamComponents(tokenizer, filter);
+          }
+        };
+
+    ShingleAnalyzerWrapper analyzer =
+        new ShingleAnalyzerWrapper(
+            delegate,
+            ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
+            ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
+            ShingleFilter.DEFAULT_TOKEN_SEPARATOR,
+            true,
+            false,
+            "--");
+    assertAnalyzesTo(
+        analyzer,
+        "please divide into shingles",
+        new String[] {
+          "please", "please divide",
+          "divide", "divide --",
+          "-- shingles", "shingles"
+        },
+        new int[] {0, 0, 7, 7, 19, 19},
+        new int[] {6, 13, 13, 19, 27, 27},
+        new int[] {1, 0, 1, 0, 1, 1});
+    analyzer.close();
+
+    delegate =
+        new Analyzer() {
+          @Override
+          protected TokenStreamComponents createComponents(String fieldName) {
+            CharArraySet stopSet = StopFilter.makeStopSet("into");
+            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+            TokenFilter filter = new StopFilter(tokenizer, stopSet);
+            return new TokenStreamComponents(tokenizer, filter);
+          }
+        };
+    analyzer =
+        new ShingleAnalyzerWrapper(
+            delegate,
+            ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
+            ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
+            ShingleFilter.DEFAULT_TOKEN_SEPARATOR,
+            false,
+            false,
+            null);
+    assertAnalyzesTo(
+        analyzer,
+        "please divide into shingles",
+        new String[] {"please divide", "divide ", " shingles"},
+        new int[] {0, 7, 19},
+        new int[] {13, 19, 27},
+        new int[] {1, 1, 1});
+    analyzer.close();
+
+    delegate =
+        new Analyzer() {
+          @Override
+          protected TokenStreamComponents createComponents(String fieldName) {
+            CharArraySet stopSet = StopFilter.makeStopSet("into");
+            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+            TokenFilter filter = new StopFilter(tokenizer, stopSet);
+            return new TokenStreamComponents(tokenizer, filter);
+          }
+        };
+    analyzer =
+        new ShingleAnalyzerWrapper(
+            delegate,
+            ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
+            ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
+            ShingleFilter.DEFAULT_TOKEN_SEPARATOR,
+            false,
+            false,
+            "");
+    assertAnalyzesTo(
+        analyzer,
+        "please divide into shingles",
+        new String[] {"please divide", "divide ", " shingles"},
+        new int[] {0, 7, 19},
+        new int[] {13, 19, 27},
+        new int[] {1, 1, 1});
+    analyzer.close();
+  }
+
+  public void testOutputUnigramsIfNoShinglesSingleToken() throws Exception {
+    ShingleAnalyzerWrapper analyzer =
+        new ShingleAnalyzerWrapper(
+            new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false),
+            ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
+            ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
+            "",
+            false,
+            true,
+            ShingleFilter.DEFAULT_FILLER_TOKEN);
+    assertAnalyzesTo(

Review comment:
       *NULL_DEREFERENCE:*  object `analyzer.reuseStrategy` last assigned on line 494 could be null and is dereferenced by call to `assertAnalyzesTo(...)` at line 501.

##########
File path: lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/TestShingleAnalyzerWrapper.java
##########
@@ -0,0 +1,505 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.shingle;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.Directory;
+
+/** A test class for ShingleAnalyzerWrapper as regards queries and scoring. */
+public class TestShingleAnalyzerWrapper extends BaseTokenStreamTestCase {
+  private Analyzer analyzer;
+  private IndexSearcher searcher;
+  private IndexReader reader;
+  private Directory directory;
+
+  /**
+   * Set up a new index in RAM with three test phrases and the supplied Analyzer.
+   *
+   * @throws Exception if an error occurs with index writer or searcher
+   */
+  @Override
+  public void setUp() throws Exception {
+    super.setUp();
+    analyzer =
+        new ShingleAnalyzerWrapper(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 2);
+    directory = newDirectory();
+    IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(analyzer));
+
+    Document doc;
+    doc = new Document();
+    doc.add(new TextField("content", "please divide this sentence into shingles", Field.Store.YES));
+    writer.addDocument(doc);
+
+    doc = new Document();
+    doc.add(new TextField("content", "just another test sentence", Field.Store.YES));
+    writer.addDocument(doc);
+
+    doc = new Document();
+    doc.add(new TextField("content", "a sentence which contains no test", Field.Store.YES));
+    writer.addDocument(doc);
+
+    writer.close();
+
+    reader = DirectoryReader.open(directory);
+    searcher = newSearcher(reader);
+  }
+
+  @Override
+  public void tearDown() throws Exception {
+    reader.close();
+    directory.close();
+    analyzer.close();
+    super.tearDown();
+  }
+
+  protected void compareRanks(ScoreDoc[] hits, int[] ranks) throws Exception {
+    assertEquals(ranks.length, hits.length);
+    for (int i = 0; i < ranks.length; i++) {
+      assertEquals(ranks[i], hits[i].doc);
+    }
+  }
+
+  /*
+   * This shows how to construct a phrase query containing shingles.
+   */
+  public void testShingleAnalyzerWrapperPhraseQuery() throws Exception {
+    PhraseQuery.Builder builder = new PhraseQuery.Builder();
+    try (TokenStream ts = analyzer.tokenStream("content", "this sentence")) {
+      int j = -1;
+
+      PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
+      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+
+      ts.reset();
+      while (ts.incrementToken()) {
+        j += posIncrAtt.getPositionIncrement();
+        String termText = termAtt.toString();
+        builder.add(new Term("content", termText), j);
+      }
+      ts.end();
+    }
+
+    PhraseQuery q = builder.build();
+    ScoreDoc[] hits = searcher.search(q, 1000).scoreDocs;
+    int[] ranks = new int[] {0};
+    compareRanks(hits, ranks);
+  }
+
+  /*
+   * How to construct a boolean query with shingles. A query like this will
+   * implicitly score those documents higher that contain the words in the query
+   * in the right order and adjacent to each other.
+   */
+  public void testShingleAnalyzerWrapperBooleanQuery() throws Exception {
+    BooleanQuery.Builder q = new BooleanQuery.Builder();
+
+    try (TokenStream ts = analyzer.tokenStream("content", "test sentence")) {
+      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+
+      ts.reset();
+      while (ts.incrementToken()) {
+        String termText = termAtt.toString();
+        q.add(new TermQuery(new Term("content", termText)), BooleanClause.Occur.SHOULD);
+      }
+      ts.end();
+    }
+
+    ScoreDoc[] hits = searcher.search(q.build(), 1000).scoreDocs;
+    int[] ranks = new int[] {1, 2, 0};
+    compareRanks(hits, ranks);
+  }
+
+  public void testReusableTokenStream() throws Exception {
+    Analyzer a =
+        new ShingleAnalyzerWrapper(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 2);
+    assertAnalyzesTo(

Review comment:
       *NULL_DEREFERENCE:*  object `a.reuseStrategy` last assigned on line 152 could be null and is dereferenced by call to `assertAnalyzesTo(...)` at line 153.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@lucene.apache.org
For additional commands, e-mail: issues-help@lucene.apache.org