You are viewing a plain text version of this content. The canonical link for it is here.
Posted to users@solr.apache.org by Furkan KAMACI <fu...@gmail.com> on 2021/04/05 08:53:58 UTC

Solr OOM when Indexing Small File at Large Index

I have Solr instance which is not SolrCloud and has a version of 7.6.0 with
a large index > 500GB

Config is as follows:

solr.autoCommit.maxTime=10000
solr.autoSoftCommit.maxTime=30000

Memory on the machine is 40 GiB which is fully consumed by other processes
too.
Solr runs with 24 GB memory.

Solr runs as a docker container.

I get an IndexWriter AlreadyClosedException and OOM when indexing a ~KB
size file.

What can be the reason:

*org.apache.solr.common.SolrException: Server error writing document id *
file://///10.10.10.10/C/abc.docx to the index
at
org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:240)
at
org.apache.solr.update.processor.RunUpdateProcessor.processAdd(RunUpdateProcessorFactory.java:67)
at
org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:55)
at
org.apache.solr.update.processor.DistributedUpdateProcessor.doLocalAdd(DistributedUpdateProcessor.java:961)
at
org.apache.solr.update.processor.DistributedUpdateProcessor.versionAdd(DistributedUpdateProcessor.java:1177)
at
org.apache.solr.update.processor.DistributedUpdateProcessor.processAdd(DistributedUpdateProcessor.java:653)
at
org.apache.solr.update.processor.LogUpdateProcessorFactory$LogUpdateProcessor.processAdd(LogUpdateProcessorFactory.java:103)
at
org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:55)
at
org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:55)
at
org.apache.solr.update.processor.LanguageIdentifierUpdateProcessor.processAdd(LanguageIdentifierUpdateProcessor.java:206)
at
org.apache.solr.update.processor.SignatureUpdateProcessorFactory$SignatureUpdateProcessor.processAdd(SignatureUpdateProcessorFactory.java:196)
at
org.apache.solr.handler.extraction.ExtractingDocumentLoader.doAdd(ExtractingDocumentLoader.java:126)
at
org.apache.solr.handler.extraction.ExtractingDocumentLoader.addDoc(ExtractingDocumentLoader.java:131)
at
org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:237)
at
org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68)
at
org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:199)
at org.apache.solr.core.SolrCore.execute(SolrCore.java:2541)
at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:709)
at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:515)
at
org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:377)
at
org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:323)
at
org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1634)
at
org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:533)
at
org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:146)
at
org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548)
at
org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
at
org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:257)
at
org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:1595)
at
org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:255)
at
org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1317)
at
org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:203)
at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:473)
at
org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1564)
at
org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:201)
at
org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1219)
at
org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:144)
at
org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:219)
at
org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:126)
at
org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
at
org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335)
at
org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
at org.eclipse.jetty.server.Server.handle(Server.java:531)
at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:352)
at
org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:260)
at
org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:281)
at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:102)
at org.eclipse.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:118)
at
org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.runTask(EatWhatYouKill.java:333)
at
org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:310)
at
org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:168)
at
org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:126)
at
org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:366)
at
org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:762)
at
org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:680)
at java.lang.Thread.run(Thread.java:823)
*Caused by: org.apache.lucene.store.AlreadyClosedException: this
IndexWriter is closed*
at org.apache.lucene.index.IndexWriter.ensureOpen(IndexWriter.java:679)
at org.apache.lucene.index.IndexWriter.ensureOpen(IndexWriter.java:693)
at org.apache.lucene.index.IndexWriter.updateDocument(IndexWriter.java:1606)
at org.apache.lucene.index.IndexWriter.updateDocument(IndexWriter.java:1601)
at
org.apache.solr.update.DirectUpdateHandler2.updateDocOrDocValues(DirectUpdateHandler2.java:964)
at
org.apache.solr.update.DirectUpdateHandler2.doNormalUpdate(DirectUpdateHandler2.java:341)
at
org.apache.solr.update.DirectUpdateHandler2.addDoc0(DirectUpdateHandler2.java:288)
at
org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:235)
... 54 more
*Caused by: java.lang.OutOfMemoryError: Java heap space*
at java.lang.StringBuilder.ensureCapacityImpl(StringBuilder.java:802)
at java.lang.StringBuilder.append(StringBuilder.java:285)
at
org.apache.lucene.analysis.pattern.PatternReplaceCharFilter.fill(PatternReplaceCharFilter.java:75)
at
org.apache.lucene.analysis.pattern.PatternReplaceCharFilter.read(PatternReplaceCharFilter.java:65)
at
org.apache.lucene.analysis.standard.ClassicTokenizerImpl.zzRefill(ClassicTokenizerImpl.java:442)
at
org.apache.lucene.analysis.standard.ClassicTokenizerImpl.getNextToken(ClassicTokenizerImpl.java:652)
at
org.apache.lucene.analysis.standard.ClassicTokenizer.incrementToken(ClassicTokenizer.java:136)
at
org.apache.lucene.analysis.FilteringTokenFilter.incrementToken(FilteringTokenFilter.java:49)
at
org.apache.lucene.analysis.pattern.PatternReplaceFilter.incrementToken(PatternReplaceFilter.java:68)
at
org.apache.lucene.analysis.pattern.PatternReplaceFilter.incrementToken(PatternReplaceFilter.java:68)
at
org.apache.lucene.analysis.pattern.PatternReplaceFilter.incrementToken(PatternReplaceFilter.java:68)
at
org.apache.lucene.analysis.FilteringTokenFilter.incrementToken(FilteringTokenFilter.java:49)
at
org.apache.lucene.analysis.LowerCaseFilter.incrementToken(LowerCaseFilter.java:44)
at
org.apache.lucene.index.DefaultIndexingChain$PerField.invert(DefaultIndexingChain.java:788)
at
org.apache.lucene.index.DefaultIndexingChain.processField(DefaultIndexingChain.java:430)
at
org.apache.lucene.index.DefaultIndexingChain.processDocument(DefaultIndexingChain.java:394)
at
org.apache.lucene.index.DocumentsWriterPerThread.updateDocument(DocumentsWriterPerThread.java:251)
at
org.apache.lucene.index.DocumentsWriter.updateDocument(DocumentsWriter.java:494)
at org.apache.lucene.index.IndexWriter.updateDocument(IndexWriter.java:1609)
... 59 more

Kind Regards,
Furkan KAMACI

Re: Solr OOM when Indexing Small File at Large Index

Posted by Jan Høydahl <ja...@cominvent.com>.
From the log I can see you run ExtractingHandler on Solr side and it failes while parsing a Word document.
I also see that you analysis uses a PatternReplaceCharFilter, which seems to the the one tripping the error?

> *Caused by: java.lang.OutOfMemoryError: Java heap space*
> at java.lang.StringBuilder.ensureCapacityImpl(StringBuilder.java:802)
> at java.lang.StringBuilder.append(StringBuilder.java:285)
> at
> org.apache.lucene.analysis.pattern.PatternReplaceCharFilter.fill(PatternReplaceCharFilter.java:75)
> at
> org.apache.lucene.analysis.pattern.PatternReplaceCharFilter.read(PatternReplaceCharFilter.java:65)
> at
> org.apache.lucene.analysis.standard.ClassicTokenizerImpl.zzRefill(ClassicTokenizerImpl.java:442)

You should probably share more of your configuration details for a better understanding of the app.

Is the OOM reproducible with the same document? Then you'll probably be able to pinpoint what in the parsing of that document consumes so much RAM.

If I were you I'd start planning for movig Tika parsing of binary full-text docs out from Solr and to a client-side app to gain more control over that part of the app.

Jan


> 5. apr. 2021 kl. 10:53 skrev Furkan KAMACI <fu...@gmail.com>:
> 
> I have Solr instance which is not SolrCloud and has a version of 7.6.0 with
> a large index > 500GB
> 
> Config is as follows:
> 
> solr.autoCommit.maxTime=10000
> solr.autoSoftCommit.maxTime=30000
> 
> Memory on the machine is 40 GiB which is fully consumed by other processes
> too.
> Solr runs with 24 GB memory.
> 
> Solr runs as a docker container.
> 
> I get an IndexWriter AlreadyClosedException and OOM when indexing a ~KB
> size file.
> 
> What can be the reason:
> 
> *org.apache.solr.common.SolrException: Server error writing document id *
> file://///10.10.10.10/C/abc.docx to the index
> at
> org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:240)
> at
> org.apache.solr.update.processor.RunUpdateProcessor.processAdd(RunUpdateProcessorFactory.java:67)
> at
> org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:55)
> at
> org.apache.solr.update.processor.DistributedUpdateProcessor.doLocalAdd(DistributedUpdateProcessor.java:961)
> at
> org.apache.solr.update.processor.DistributedUpdateProcessor.versionAdd(DistributedUpdateProcessor.java:1177)
> at
> org.apache.solr.update.processor.DistributedUpdateProcessor.processAdd(DistributedUpdateProcessor.java:653)
> at
> org.apache.solr.update.processor.LogUpdateProcessorFactory$LogUpdateProcessor.processAdd(LogUpdateProcessorFactory.java:103)
> at
> org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:55)
> at
> org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:55)
> at
> org.apache.solr.update.processor.LanguageIdentifierUpdateProcessor.processAdd(LanguageIdentifierUpdateProcessor.java:206)
> at
> org.apache.solr.update.processor.SignatureUpdateProcessorFactory$SignatureUpdateProcessor.processAdd(SignatureUpdateProcessorFactory.java:196)
> at
> org.apache.solr.handler.extraction.ExtractingDocumentLoader.doAdd(ExtractingDocumentLoader.java:126)
> at
> org.apache.solr.handler.extraction.ExtractingDocumentLoader.addDoc(ExtractingDocumentLoader.java:131)
> at
> org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:237)
> at
> org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68)
> at
> org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:199)
> at org.apache.solr.core.SolrCore.execute(SolrCore.java:2541)
> at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:709)
> at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:515)
> at
> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:377)
> at
> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:323)
> at
> org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1634)
> at
> org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:533)
> at
> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:146)
> at
> org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548)
> at
> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
> at
> org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:257)
> at
> org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:1595)
> at
> org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:255)
> at
> org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1317)
> at
> org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:203)
> at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:473)
> at
> org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1564)
> at
> org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:201)
> at
> org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1219)
> at
> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:144)
> at
> org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:219)
> at
> org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:126)
> at
> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
> at
> org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335)
> at
> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
> at org.eclipse.jetty.server.Server.handle(Server.java:531)
> at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:352)
> at
> org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:260)
> at
> org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:281)
> at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:102)
> at org.eclipse.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:118)
> at
> org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.runTask(EatWhatYouKill.java:333)
> at
> org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:310)
> at
> org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:168)
> at
> org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:126)
> at
> org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:366)
> at
> org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:762)
> at
> org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:680)
> at java.lang.Thread.run(Thread.java:823)
> *Caused by: org.apache.lucene.store.AlreadyClosedException: this
> IndexWriter is closed*
> at org.apache.lucene.index.IndexWriter.ensureOpen(IndexWriter.java:679)
> at org.apache.lucene.index.IndexWriter.ensureOpen(IndexWriter.java:693)
> at org.apache.lucene.index.IndexWriter.updateDocument(IndexWriter.java:1606)
> at org.apache.lucene.index.IndexWriter.updateDocument(IndexWriter.java:1601)
> at
> org.apache.solr.update.DirectUpdateHandler2.updateDocOrDocValues(DirectUpdateHandler2.java:964)
> at
> org.apache.solr.update.DirectUpdateHandler2.doNormalUpdate(DirectUpdateHandler2.java:341)
> at
> org.apache.solr.update.DirectUpdateHandler2.addDoc0(DirectUpdateHandler2.java:288)
> at
> org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:235)
> ... 54 more
> *Caused by: java.lang.OutOfMemoryError: Java heap space*
> at java.lang.StringBuilder.ensureCapacityImpl(StringBuilder.java:802)
> at java.lang.StringBuilder.append(StringBuilder.java:285)
> at
> org.apache.lucene.analysis.pattern.PatternReplaceCharFilter.fill(PatternReplaceCharFilter.java:75)
> at
> org.apache.lucene.analysis.pattern.PatternReplaceCharFilter.read(PatternReplaceCharFilter.java:65)
> at
> org.apache.lucene.analysis.standard.ClassicTokenizerImpl.zzRefill(ClassicTokenizerImpl.java:442)
> at
> org.apache.lucene.analysis.standard.ClassicTokenizerImpl.getNextToken(ClassicTokenizerImpl.java:652)
> at
> org.apache.lucene.analysis.standard.ClassicTokenizer.incrementToken(ClassicTokenizer.java:136)
> at
> org.apache.lucene.analysis.FilteringTokenFilter.incrementToken(FilteringTokenFilter.java:49)
> at
> org.apache.lucene.analysis.pattern.PatternReplaceFilter.incrementToken(PatternReplaceFilter.java:68)
> at
> org.apache.lucene.analysis.pattern.PatternReplaceFilter.incrementToken(PatternReplaceFilter.java:68)
> at
> org.apache.lucene.analysis.pattern.PatternReplaceFilter.incrementToken(PatternReplaceFilter.java:68)
> at
> org.apache.lucene.analysis.FilteringTokenFilter.incrementToken(FilteringTokenFilter.java:49)
> at
> org.apache.lucene.analysis.LowerCaseFilter.incrementToken(LowerCaseFilter.java:44)
> at
> org.apache.lucene.index.DefaultIndexingChain$PerField.invert(DefaultIndexingChain.java:788)
> at
> org.apache.lucene.index.DefaultIndexingChain.processField(DefaultIndexingChain.java:430)
> at
> org.apache.lucene.index.DefaultIndexingChain.processDocument(DefaultIndexingChain.java:394)
> at
> org.apache.lucene.index.DocumentsWriterPerThread.updateDocument(DocumentsWriterPerThread.java:251)
> at
> org.apache.lucene.index.DocumentsWriter.updateDocument(DocumentsWriter.java:494)
> at org.apache.lucene.index.IndexWriter.updateDocument(IndexWriter.java:1609)
> ... 59 more
> 
> Kind Regards,
> Furkan KAMACI