You are viewing a plain text version of this content. The canonical link for it is here.
Posted to users@solr.apache.org by gnandre <ar...@gmail.com> on 2023/06/13 03:06:24 UTC

java.lang.NullPointerException: stopWords - solr 9.2.1

Hi,

I am using Solr 9.2.1 (official docker image).

When I try to index a document, I get the error shown at the bottom of this
email.

Here is my corresponding filter setting

  <fieldType name="url_path_text" class="solr.TextField">
    <analyzer type="index">
      <tokenizer name="pattern" pattern=
"https?:\/\/((www[^\.]+)|(www.))?|\/([^/]\.[^/]$)?|\.?mat\.[^/]+|[?#].*$"
group="-1"/>
      <filter name="wordDelimiterGraph" protected="protect.txt"
preserveOriginal="1"  generateWordParts="1" generateNumberParts="1"
catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"
/>
      <filter name="flattenGraph"/>
      <filter name="stop" format="snowball" ignoreCase="true" words=
"stopwords.txt"/>
      <filter name="lowercase"/>
      <filter name="icuNormalizer2"/>
      <filter name="snowballPorter" protected="protwords.txt" language=
"English"/>
      <filter name="removeDuplicates"/>
    </analyzer>
    <analyzer type="query">
      <tokenizer name="whitespace"/>
      <filter name="stop" format="snowball" ignoreCase="true" words=
"stopwords.txt"/>
      <filter name="wordDelimiterGraph" protected="protect.txt"
preserveOriginal="1"  generateWordParts="1" generateNumberParts="1"
catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"
/>
      <filter name="lowercase"/>
      <filter name="icuNormalizer2"/>
      <filter name="snowballPorter" protected="protwords.txt" language=
"English"/>
      <filter name="removeDuplicates"/>
    </analyzer>
  </fieldType>

What am I missing?

development-environment-solr-9-1  | 2023-06-13 02:58:09.441 ERROR
(qtp479459041-20) [ x:documentation] o.a.s.h.RequestHandlerBase
org.apache.solr.common.SolrException: Exception writing document id
doc:en:doc1 to the index; possible analysis error. =>
org.apache.solr.common.SolrException: Exception writing document id
doc:en:doc1 to the index; possible analysis error.
development-environment-solr-9-1  |     at
org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:323)
development-environment-solr-9-1  | org.apache.solr.common.SolrException:
Exception writing document id doc:en:doc1 to the index; possible analysis
error.
development-environment-solr-9-1  |     at
org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:323)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.update.processor.RunUpdateProcessorFactory$RunUpdateProcessor.processAdd(RunUpdateProcessorFactory.java:76)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:55)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.update.processor.DistributedUpdateProcessor.doLocalAdd(DistributedUpdateProcessor.java:270)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.update.processor.DistributedUpdateProcessor.doVersionAdd(DistributedUpdateProcessor.java:545)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.update.processor.DistributedUpdateProcessor.lambda$versionAdd$0(DistributedUpdateProcessor.java:357)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.update.VersionBucket.runWithLock(VersionBucket.java:51)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.update.processor.DistributedUpdateProcessor.versionAdd(DistributedUpdateProcessor.java:354)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.update.processor.DistributedUpdateProcessor.processAdd(DistributedUpdateProcessor.java:236)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.update.processor.LogUpdateProcessorFactory$LogUpdateProcessor.processAdd(LogUpdateProcessorFactory.java:111)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.handler.loader.JavabinLoader$1.update(JavabinLoader.java:123)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec$StreamingCodec.readOuterMostDocIterator(JavaBinUpdateRequestCodec.java:342)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec$StreamingCodec.readIterator(JavaBinUpdateRequestCodec.java:286)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.common.util.JavaBinCodec.readObject(JavaBinCodec.java:338)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.common.util.JavaBinCodec.readVal(JavaBinCodec.java:283)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec$StreamingCodec.readNamedList(JavaBinUpdateRequestCodec.java:236)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.common.util.JavaBinCodec.readObject(JavaBinCodec.java:303)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.common.util.JavaBinCodec.readVal(JavaBinCodec.java:283)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.common.util.JavaBinCodec.unmarshal(JavaBinCodec.java:193)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec.unmarshal(JavaBinUpdateRequestCodec.java:126)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.handler.loader.JavabinLoader.parseAndLoadDocs(JavabinLoader.java:135)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.handler.loader.JavabinLoader.load(JavabinLoader.java:74)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.handler.UpdateRequestHandler$1.load(UpdateRequestHandler.java:101)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:84)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:224)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.core.SolrCore.execute(SolrCore.java:2890) ~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.servlet.HttpSolrCall.executeCoreRequest(HttpSolrCall.java:872)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:568) ~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.servlet.SolrDispatchFilter.dispatch(SolrDispatchFilter.java:252)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.servlet.SolrDispatchFilter.lambda$doFilter$0(SolrDispatchFilter.java:220)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.servlet.ServletUtils.traceHttpRequestExecution2(ServletUtils.java:257)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.servlet.ServletUtils.rateLimitRequest(ServletUtils.java:227)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:215)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:197)
~[?:?]
development-environment-solr-9-1  |     at
org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:210)
~[jetty-servlet-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1635)
~[jetty-servlet-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:527)
~[jetty-servlet-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:131)
~[jetty-server-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:578)
~[jetty-security-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:122)
~[jetty-server-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:223)
~[jetty-server-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:1570)
~[jetty-server-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:221)
~[jetty-server-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1383)
~[jetty-server-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:176)
~[jetty-server-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:484)
~[jetty-servlet-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1543)
~[jetty-server-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:174)
~[jetty-server-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1305)
~[jetty-server-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:129)
~[jetty-server-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:149)
~[jetty-server-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.server.handler.InetAccessHandler.handle(InetAccessHandler.java:228)
~[jetty-server-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:141)
~[jetty-server-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:122)
~[jetty-server-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:301)
~[jetty-rewrite-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:122)
~[jetty-server-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.server.handler.gzip.GzipHandler.handle(GzipHandler.java:822)
~[jetty-server-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:122)
~[jetty-server-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.server.Server.handle(Server.java:563)
~[jetty-server-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.server.HttpChannel.lambda$handle$0(HttpChannel.java:505)
~[jetty-server-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.server.HttpChannel.dispatch(HttpChannel.java:762)
~[jetty-server-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:497)
~[jetty-server-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:282)
~[jetty-server-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:314)
~[jetty-io-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:100)
~[jetty-io-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.io.SelectableChannelEndPoint$1.run(SelectableChannelEndPoint.java:53)
~[jetty-io-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.util.thread.strategy.AdaptiveExecutionStrategy.runTask(AdaptiveExecutionStrategy.java:416)
~[jetty-util-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.util.thread.strategy.AdaptiveExecutionStrategy.consumeTask(AdaptiveExecutionStrategy.java:385)
~[jetty-util-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.util.thread.strategy.AdaptiveExecutionStrategy.tryProduce(AdaptiveExecutionStrategy.java:272)
~[jetty-util-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.util.thread.strategy.AdaptiveExecutionStrategy.lambda$new$0(AdaptiveExecutionStrategy.java:140)
~[jetty-util-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:411)
~[jetty-util-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:934)
~[jetty-util-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at
org.eclipse.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:1078)
~[jetty-util-10.0.13.jar:10.0.13]
development-environment-solr-9-1  |     at java.lang.Thread.run(Unknown
Source) [?:?]
development-environment-solr-9-1  | Caused by:
java.lang.NullPointerException: stopWords
development-environment-solr-9-1  |     at
java.util.Objects.requireNonNull(Unknown Source) ~[?:?]
development-environment-solr-9-1  |     at
org.apache.lucene.analysis.StopFilter.<init>(StopFilter.java:39) ~[?:?]
development-environment-solr-9-1  |     at
org.apache.lucene.analysis.core.StopFilter.<init>(StopFilter.java:43) ~[?:?]
development-environment-solr-9-1  |     at
org.apache.lucene.analysis.core.StopFilterFactory.create(StopFilterFactory.java:91)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.analysis.TokenizerChain.createComponents(TokenizerChain.java:132)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.lucene.analysis.AnalyzerWrapper.createComponents(AnalyzerWrapper.java:120)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.lucene.analysis.AnalyzerWrapper.createComponents(AnalyzerWrapper.java:120)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.lucene.analysis.Analyzer.tokenStream(Analyzer.java:193) ~[?:?]
development-environment-solr-9-1  |     at
org.apache.lucene.document.Field.tokenStream(Field.java:486) ~[?:?]
development-environment-solr-9-1  |     at
org.apache.lucene.index.IndexingChain$PerField.invert(IndexingChain.java:1103)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.lucene.index.IndexingChain.processField(IndexingChain.java:681)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.lucene.index.IndexingChain.processDocument(IndexingChain.java:566)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.lucene.index.DocumentsWriterPerThread.updateDocuments(DocumentsWriterPerThread.java:241)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.lucene.index.DocumentsWriter.updateDocuments(DocumentsWriter.java:432)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.lucene.index.IndexWriter.updateDocuments(IndexWriter.java:1533)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.lucene.index.IndexWriter.updateDocuments(IndexWriter.java:1522)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.update.DirectUpdateHandler2.updateDocOrDocValues(DirectUpdateHandler2.java:1050)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.update.DirectUpdateHandler2.doNormalUpdate(DirectUpdateHandler2.java:409)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.update.DirectUpdateHandler2.addDoc0(DirectUpdateHandler2.java:362)
~[?:?]
development-environment-solr-9-1  |     at
org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:299)
~[?:?]
development-environment-solr-9-1  |     ... 73 more

Re: java.lang.NullPointerException: stopWords - solr 9.2.1

Posted by Shawn Heisey <ap...@elyograg.org>.
On 6/12/23 21:06, gnandre wrote:
> Hi,
> 
> I am using Solr 9.2.1 (official docker image).
> 
> When I try to index a document, I get the error shown at the bottom of this
> email.

I added your fieldType to a config, added a field using that type, 
uploaded it to ZK, created a collection with that config, and then 
indexed a document that included that new field.

It worked without issues.  My version info (built from source, branch_9x):

solr-spec
9.3.0
solr-impl
9.3.0-SNAPSHOT 555cb35480ec34caca04903e440a2c7b336346ad [snapshot build, 
details omitted]
lucene-spec
9.5.0
lucene-impl
9.5.0 13803aa6ea7fee91f798cfeded4296182ac43a21 - 2023-01-25 16:44:59

An FYI:  You should not use a stopword filter.  In days long past, 
stopword removal caused a SIGNIFICANT increase in search performance. 
But it came at a high price ... certain queries do not work well when 
stopwords are removed.  The classic example of a query that stopwords 
break is "to be or not to be".  But I have a relevant one for more 
recent times:  "the who"

These days, system capacities are a lot better than they were in those 
days, so stopword removal does not offer as much of a performance boost. 
  Most people who are familiar with search technology feel that the 
reduction in query correctness is not worth the performance gain that 
has steadily dwindled over the years.

Your stopword list is particularly long.  Any query using any of the 
words in that list will NOT function correctly ... and with the long 
list you've got, that's a LOT of words that won't work.

What is this text "development-environment-solr-9-1" that is scattered 
throughout the error message you pasted?  I have never seen anything 
like that before.

I am not familiar with creating a schema using values like:

<filter name="lowercase"/>

So I do not know how to spot problems with that kind of schema.  This is 
what a complex fieldType looks like in my schema:

   <fieldType name="text" class="solr.TextField" 
autoGeneratePhraseQueries="true" positionIncrementGap="100">
     <analyzer type="index">
       <tokenizer class="solr.ICUTokenizerFactory"/>
       <filter class="solr.ICUFoldingFilterFactory"/>
       <filter class="solr.KeywordMarkerFilterFactory" 
protected="protwords.txt"/>
       <filter class="solr.WordDelimiterGraphFilterFactory" 
catenateNumbers="1" generateNumberParts="1" splitOnCaseChange="1" 
generateWordParts="1" splitOnNumerics="1" catenateAll="1" 
catenateWords="1"/>
       <filter class="solr.FlattenGraphFilterFactory"/>
       <filter class="solr.PorterStemFilterFactory"/>
     </analyzer>
     <analyzer type="query">
       <tokenizer class="solr.ICUTokenizerFactory"/>
       <filter class="solr.ICUFoldingFilterFactory"/>
       <filter class="solr.KeywordMarkerFilterFactory" 
protected="protwords.txt"/>
       <filter class="solr.SynonymGraphFilterFactory" expand="true" 
ignoreCase="true" synonyms="synonyms.txt"/>
       <filter class="solr.WordDelimiterGraphFilterFactory" 
catenateNumbers="1" generateNumberParts="1" splitOnCaseChange="1" 
generateWordParts="1" splitOnNumerics="1" catenateAll="1" 
catenateWords="1"/>
       <filter class="solr.PorterStemFilterFactory"/>
     </analyzer>
   </fieldType>

It's worth noting that my synonym list is the one that comes with Solr. 
I only left it the synonym filter there in the event that some future 
version of me decides I want to use a synonym.

#-----------------------------------------------------------------------
#some test synonym mappings unlikely to appear in real input text
aaafoo => aaabar
bbbfoo => bbbfoo bbbbar
cccfoo => cccbar cccbaz
fooaaa,baraaa,bazaaa

# Some synonym groups specific to this example
GB,gib,gigabyte,gigabytes
MB,mib,megabyte,megabytes
Television, Televisions, TV, TVs
#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming
#after us won't split it into two words.

# Synonym mappings can be used for spelling correction too
pixima => pixma
#-----------------------------------------------------------------------

Thanks,
Shawn

Re: java.lang.NullPointerException: stopWords - solr 9.2.1

Posted by gnandre <ar...@gmail.com>.
Here is the corresponding stopwords.txt file content:

\#  Licensed to the Apache Software Foundation (ASF) under one or more
\#  contributor license agreements.  See the NOTICE file distributed with
\#  this work for additional information regarding copyright ownership.
\#  The ASF licenses this file to You under the Apache License, Version 2.0
\#  (the "License"); you may not use this file except in compliance with
\#  the License.  You may obtain a copy of the License at
\#
\#      http://www.apache.org/licenses/LICENSE-2.0
\#
\#  Unless required by applicable law or agreed to in writing, software
\#  distributed under the License is distributed on an "AS IS" BASIS,
\#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
\#  See the License for the specific language governing permissions and
\#  limitations under the License.

\# -----------------------------------------------------------------------
\#  a couple of test stopwords to test that the words are really being
\#  configured from this file:
stopworda
stopwordb

\# Standard english stop words taken from Lucene's StopAnalyzer
a
an
\# and
are
as
at
be
but
by
\# for
\# if
in
into
\# is
it
no
\# not
of
on
\# or
s
such
t
that
the
their
then
there
these
they
this
to
was
will
with


On Mon, Jun 12, 2023 at 11:06 PM gnandre <ar...@gmail.com> wrote:

> Hi,
>
> I am using Solr 9.2.1 (official docker image).
>
> When I try to index a document, I get the error shown at the bottom of
> this email.
>
> Here is my corresponding filter setting
>
>   <fieldType name="url_path_text" class="solr.TextField">
>     <analyzer type="index">
>       <tokenizer name="pattern" pattern=
> "https?:\/\/((www[^\.]+)|(www.))?|\/([^/]\.[^/]$)?|\.?mat\.[^/]+|[?#].*$"
> group="-1"/>
>       <filter name="wordDelimiterGraph" protected="protect.txt"
> preserveOriginal="1"  generateWordParts="1" generateNumberParts="1"
> catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange=
> "1"/>
>       <filter name="flattenGraph"/>
>       <filter name="stop" format="snowball" ignoreCase="true" words=
> "stopwords.txt"/>
>       <filter name="lowercase"/>
>       <filter name="icuNormalizer2"/>
>       <filter name="snowballPorter" protected="protwords.txt" language=
> "English"/>
>       <filter name="removeDuplicates"/>
>     </analyzer>
>     <analyzer type="query">
>       <tokenizer name="whitespace"/>
>       <filter name="stop" format="snowball" ignoreCase="true" words=
> "stopwords.txt"/>
>       <filter name="wordDelimiterGraph" protected="protect.txt"
> preserveOriginal="1"  generateWordParts="1" generateNumberParts="1"
> catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange=
> "1"/>
>       <filter name="lowercase"/>
>       <filter name="icuNormalizer2"/>
>       <filter name="snowballPorter" protected="protwords.txt" language=
> "English"/>
>       <filter name="removeDuplicates"/>
>     </analyzer>
>   </fieldType>
>
> What am I missing?
>
> development-environment-solr-9-1  | 2023-06-13 02:58:09.441 ERROR
> (qtp479459041-20) [ x:documentation] o.a.s.h.RequestHandlerBase
> org.apache.solr.common.SolrException: Exception writing document id
> doc:en:doc1 to the index; possible analysis error. =>
> org.apache.solr.common.SolrException: Exception writing document id
> doc:en:doc1 to the index; possible analysis error.
> development-environment-solr-9-1  |     at
> org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:323)
> development-environment-solr-9-1  | org.apache.solr.common.SolrException:
> Exception writing document id doc:en:doc1 to the index; possible analysis
> error.
> development-environment-solr-9-1  |     at
> org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:323)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.update.processor.RunUpdateProcessorFactory$RunUpdateProcessor.processAdd(RunUpdateProcessorFactory.java:76)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:55)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.update.processor.DistributedUpdateProcessor.doLocalAdd(DistributedUpdateProcessor.java:270)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.update.processor.DistributedUpdateProcessor.doVersionAdd(DistributedUpdateProcessor.java:545)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.update.processor.DistributedUpdateProcessor.lambda$versionAdd$0(DistributedUpdateProcessor.java:357)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.update.VersionBucket.runWithLock(VersionBucket.java:51)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.update.processor.DistributedUpdateProcessor.versionAdd(DistributedUpdateProcessor.java:354)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.update.processor.DistributedUpdateProcessor.processAdd(DistributedUpdateProcessor.java:236)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.update.processor.LogUpdateProcessorFactory$LogUpdateProcessor.processAdd(LogUpdateProcessorFactory.java:111)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.handler.loader.JavabinLoader$1.update(JavabinLoader.java:123)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec$StreamingCodec.readOuterMostDocIterator(JavaBinUpdateRequestCodec.java:342)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec$StreamingCodec.readIterator(JavaBinUpdateRequestCodec.java:286)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.common.util.JavaBinCodec.readObject(JavaBinCodec.java:338)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.common.util.JavaBinCodec.readVal(JavaBinCodec.java:283)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec$StreamingCodec.readNamedList(JavaBinUpdateRequestCodec.java:236)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.common.util.JavaBinCodec.readObject(JavaBinCodec.java:303)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.common.util.JavaBinCodec.readVal(JavaBinCodec.java:283)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.common.util.JavaBinCodec.unmarshal(JavaBinCodec.java:193)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec.unmarshal(JavaBinUpdateRequestCodec.java:126)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.handler.loader.JavabinLoader.parseAndLoadDocs(JavabinLoader.java:135)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.handler.loader.JavabinLoader.load(JavabinLoader.java:74)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.handler.UpdateRequestHandler$1.load(UpdateRequestHandler.java:101)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:84)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:224)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.core.SolrCore.execute(SolrCore.java:2890) ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.servlet.HttpSolrCall.executeCoreRequest(HttpSolrCall.java:872)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:568) ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.servlet.SolrDispatchFilter.dispatch(SolrDispatchFilter.java:252)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.servlet.SolrDispatchFilter.lambda$doFilter$0(SolrDispatchFilter.java:220)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.servlet.ServletUtils.traceHttpRequestExecution2(ServletUtils.java:257)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.servlet.ServletUtils.rateLimitRequest(ServletUtils.java:227)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:215)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:197)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:210)
> ~[jetty-servlet-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1635)
> ~[jetty-servlet-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:527)
> ~[jetty-servlet-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:131)
> ~[jetty-server-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:578)
> ~[jetty-security-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:122)
> ~[jetty-server-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:223)
> ~[jetty-server-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:1570)
> ~[jetty-server-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:221)
> ~[jetty-server-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1383)
> ~[jetty-server-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:176)
> ~[jetty-server-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:484)
> ~[jetty-servlet-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1543)
> ~[jetty-server-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:174)
> ~[jetty-server-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1305)
> ~[jetty-server-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:129)
> ~[jetty-server-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:149)
> ~[jetty-server-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.server.handler.InetAccessHandler.handle(InetAccessHandler.java:228)
> ~[jetty-server-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:141)
> ~[jetty-server-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:122)
> ~[jetty-server-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:301)
> ~[jetty-rewrite-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:122)
> ~[jetty-server-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.server.handler.gzip.GzipHandler.handle(GzipHandler.java:822)
> ~[jetty-server-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:122)
> ~[jetty-server-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.server.Server.handle(Server.java:563)
> ~[jetty-server-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.server.HttpChannel.lambda$handle$0(HttpChannel.java:505)
> ~[jetty-server-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.server.HttpChannel.dispatch(HttpChannel.java:762)
> ~[jetty-server-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:497)
> ~[jetty-server-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:282)
> ~[jetty-server-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:314)
> ~[jetty-io-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:100)
> ~[jetty-io-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.io.SelectableChannelEndPoint$1.run(SelectableChannelEndPoint.java:53)
> ~[jetty-io-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.util.thread.strategy.AdaptiveExecutionStrategy.runTask(AdaptiveExecutionStrategy.java:416)
> ~[jetty-util-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.util.thread.strategy.AdaptiveExecutionStrategy.consumeTask(AdaptiveExecutionStrategy.java:385)
> ~[jetty-util-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.util.thread.strategy.AdaptiveExecutionStrategy.tryProduce(AdaptiveExecutionStrategy.java:272)
> ~[jetty-util-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.util.thread.strategy.AdaptiveExecutionStrategy.lambda$new$0(AdaptiveExecutionStrategy.java:140)
> ~[jetty-util-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:411)
> ~[jetty-util-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:934)
> ~[jetty-util-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at
> org.eclipse.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:1078)
> ~[jetty-util-10.0.13.jar:10.0.13]
> development-environment-solr-9-1  |     at java.lang.Thread.run(Unknown
> Source) [?:?]
> development-environment-solr-9-1  | Caused by:
> java.lang.NullPointerException: stopWords
> development-environment-solr-9-1  |     at
> java.util.Objects.requireNonNull(Unknown Source) ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.lucene.analysis.StopFilter.<init>(StopFilter.java:39) ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.lucene.analysis.core.StopFilter.<init>(StopFilter.java:43) ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.lucene.analysis.core.StopFilterFactory.create(StopFilterFactory.java:91)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.analysis.TokenizerChain.createComponents(TokenizerChain.java:132)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.lucene.analysis.AnalyzerWrapper.createComponents(AnalyzerWrapper.java:120)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.lucene.analysis.AnalyzerWrapper.createComponents(AnalyzerWrapper.java:120)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.lucene.analysis.Analyzer.tokenStream(Analyzer.java:193) ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.lucene.document.Field.tokenStream(Field.java:486) ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.lucene.index.IndexingChain$PerField.invert(IndexingChain.java:1103)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.lucene.index.IndexingChain.processField(IndexingChain.java:681)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.lucene.index.IndexingChain.processDocument(IndexingChain.java:566)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.lucene.index.DocumentsWriterPerThread.updateDocuments(DocumentsWriterPerThread.java:241)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.lucene.index.DocumentsWriter.updateDocuments(DocumentsWriter.java:432)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.lucene.index.IndexWriter.updateDocuments(IndexWriter.java:1533)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.lucene.index.IndexWriter.updateDocuments(IndexWriter.java:1522)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.update.DirectUpdateHandler2.updateDocOrDocValues(DirectUpdateHandler2.java:1050)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.update.DirectUpdateHandler2.doNormalUpdate(DirectUpdateHandler2.java:409)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.update.DirectUpdateHandler2.addDoc0(DirectUpdateHandler2.java:362)
> ~[?:?]
> development-environment-solr-9-1  |     at
> org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:299)
> ~[?:?]
> development-environment-solr-9-1  |     ... 73 more
>