You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-user@lucene.apache.org by Bisonti Mario <Ma...@vimar.com> on 2018/07/27 09:36:25 UTC

java.lang.OutOfMemoryError indexing xlsm and xlsx file

Hallo
I obtain the error indexing a .xlsm or .xlsx file of 11 MB

What could I do?

Thanks a lot
Mario

2018-07-27 11:08:25.634 WARN  (qtp1521083627-99) [   x:core_share] o.e.j.s.HttpChannel /solr/core_share/update/extract
java.lang.OutOfMemoryError
        at java.base/java.lang.AbstractStringBuilder.hugeCapacity(AbstractStringBuilder.java:188)
        at java.base/java.lang.AbstractStringBuilder.newCapacity(AbstractStringBuilder.java:180)
        at java.base/java.lang.AbstractStringBuilder.ensureCapacityInternal(AbstractStringBuilder.java:147)
        at java.base/java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:660)
        at java.base/java.lang.StringBuilder.append(StringBuilder.java:195)
        at org.apache.solr.handler.extraction.SolrContentHandler.characters(SolrContentHandler.java:302)
        at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
        at org.apache.tika.sax.SecureContentHandler.characters(SecureContentHandler.java:270)
        at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
        at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
        at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
        at org.apache.tika.sax.SafeContentHandler.access$001(SafeContentHandler.java:46)
        at org.apache.tika.sax.SafeContentHandler$1.write(SafeContentHandler.java:82)
        at org.apache.tika.sax.SafeContentHandler.filter(SafeContentHandler.java:140)
        at org.apache.tika.sax.SafeContentHandler.characters(SafeContentHandler.java:287)
        at org.apache.tika.sax.XHTMLContentHandler.characters(XHTMLContentHandler.java:279)
        at org.apache.tika.sax.XHTMLContentHandler.characters(XHTMLContentHandler.java:306)
        at org.apache.tika.parser.microsoft.ooxml.OOXMLTikaBodyPartHandler.run(OOXMLTikaBodyPartHandler.java:147)
        at org.apache.tika.parser.microsoft.ooxml.OOXMLWordAndPowerPointTextHandler.handleEndOfRun(OOXMLWordAndPowerPointTextHandler.java:468)
        at org.apache.tika.parser.microsoft.ooxml.OOXMLWordAndPowerPointTextHandler.endElement(OOXMLWordAndPowerPointTextHandler.java:450)
        at org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)
        at org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)
        at java.xml/com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.endElement(AbstractSAXParser.java:609)
        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanEndElement(XMLDocumentFragmentScannerImpl.java:1714)
        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl$FragmentContentDriver.next(XMLDocumentFragmentScannerImpl.java:2879)
        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl.next(XMLDocumentScannerImpl.java:602)
        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLNSDocumentScannerImpl.next(XMLNSDocumentScannerImpl.java:112)
        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanDocument(XMLDocumentFragmentScannerImpl.java:532)
        at java.xml/com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:888)
        at java.xml/com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:824)
        at java.xml/com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(XMLParser.java:141)
        at java.xml/com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(AbstractSAXParser.java:1213)
        at java.xml/com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser.parse(SAXParserImpl.java:635)
        at java.xml/com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl.parse(SAXParserImpl.java:324)
        at java.xml/javax.xml.parsers.SAXParser.parse(SAXParser.java:197)
        at org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.handleGeneralTextContainingPart(AbstractOOXMLExtractor.java:506)
        at org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.processShapes(XSSFExcelExtractorDecorator.java:279)
        at org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.buildXHTML(XSSFExcelExtractorDecorator.java:185)
        at org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.getXHTML(AbstractOOXMLExtractor.java:135)
        at org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.getXHTML(XSSFExcelExtractorDecorator.java:120)
        at org.apache.tika.parser.microsoft.ooxml.OOXMLExtractorFactory.parse(OOXMLExtractorFactory.java:143)
        at org.apache.tika.parser.microsoft.ooxml.OOXMLParser.parse(OOXMLParser.java:106)
        at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
        at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
        at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:143)
        at org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228)
        at org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68)
        at org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:195)
        at org.apache.solr.core.SolrCore.execute(SolrCore.java:2503)
        at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:711)
        at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:517)
        at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:384)
        at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:330)
        at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1629)
        at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:533)
        at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)
        at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548)
        at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
        at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:190)
        at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:1595)
        at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:188)
        at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1253)
        at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:168)
        at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:473)
        at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1564)
        at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:166)
        at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1155)
        at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
        at org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:219)
        at org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:126)
        at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
        at org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335)
        at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
        at org.eclipse.jetty.server.Server.handle(Server.java:530)
        at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:347)
        at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:256)
        at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:279)
        at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:102)
        at org.eclipse.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:124)
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:247)
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.produce(EatWhatYouKill.java:140)
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:131)
        at org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:382)
        at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:708)
        at org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:626)
        at java.base/java.lang.Thread.run(Thread.java:844)
2018-07-27 11:09:00.466 INFO  (qtp1521083627-93) [   x:core_share] o.a.s.u.DirectUpdateHandler2 start commit{_version_=1607134127551676416,optimize=false,openSearcher=true,waitSearcher=true,expungeDeletes=false,softCommit=false,prepareCommit=false}
2018-07-27 11:09:00.466 INFO  (qtp1521083627-93) [   x:core_share] o.a.s.u.DirectUpdateHandler2 No uncommitted changes. Skipping IW.commit.
2018-07-27 11:09:00.466 INFO  (qtp1521083627-93) [   x:core_share] o.a.s.u.DirectUpdateHandler2 end_commit_flush
2018-07-27 11:09:00.466 INFO  (qtp1521083627-93) [   x:core_share] o.a.s.u.p.LogUpdateProcessorFactory [core_share]  webapp=/solr path=/update/extract params={commit=true&wt=xml&version=2.2}{commit=} 0 1



Re: java.lang.OutOfMemoryError indexing xlsm and xlsx file

Posted by Andrea Gazzarini <a....@sease.io>.
Hi Mario, could you please share your settings (e.g. OS, JVM memory, 
System memory)?

Andrea

On 27/07/18 11:36, Bisonti Mario wrote:
> Hallo
> I obtain the error indexing a .xlsm or .xlsx file of 11 MB
>
> What could I do?
>
> Thanks a lot
> Mario
>
> 2018-07-27 11:08:25.634 WARN  (qtp1521083627-99) [   x:core_share] o.e.j.s.HttpChannel /solr/core_share/update/extract
> java.lang.OutOfMemoryError
>          at java.base/java.lang.AbstractStringBuilder.hugeCapacity(AbstractStringBuilder.java:188)
>          at java.base/java.lang.AbstractStringBuilder.newCapacity(AbstractStringBuilder.java:180)
>          at java.base/java.lang.AbstractStringBuilder.ensureCapacityInternal(AbstractStringBuilder.java:147)
>          at java.base/java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:660)
>          at java.base/java.lang.StringBuilder.append(StringBuilder.java:195)
>          at org.apache.solr.handler.extraction.SolrContentHandler.characters(SolrContentHandler.java:302)
>          at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
>          at org.apache.tika.sax.SecureContentHandler.characters(SecureContentHandler.java:270)
>          at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
>          at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
>          at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
>          at org.apache.tika.sax.SafeContentHandler.access$001(SafeContentHandler.java:46)
>          at org.apache.tika.sax.SafeContentHandler$1.write(SafeContentHandler.java:82)
>          at org.apache.tika.sax.SafeContentHandler.filter(SafeContentHandler.java:140)
>          at org.apache.tika.sax.SafeContentHandler.characters(SafeContentHandler.java:287)
>          at org.apache.tika.sax.XHTMLContentHandler.characters(XHTMLContentHandler.java:279)
>          at org.apache.tika.sax.XHTMLContentHandler.characters(XHTMLContentHandler.java:306)
>          at org.apache.tika.parser.microsoft.ooxml.OOXMLTikaBodyPartHandler.run(OOXMLTikaBodyPartHandler.java:147)
>          at org.apache.tika.parser.microsoft.ooxml.OOXMLWordAndPowerPointTextHandler.handleEndOfRun(OOXMLWordAndPowerPointTextHandler.java:468)
>          at org.apache.tika.parser.microsoft.ooxml.OOXMLWordAndPowerPointTextHandler.endElement(OOXMLWordAndPowerPointTextHandler.java:450)
>          at org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)
>          at org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)
>          at java.xml/com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.endElement(AbstractSAXParser.java:609)
>          at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanEndElement(XMLDocumentFragmentScannerImpl.java:1714)
>          at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl$FragmentContentDriver.next(XMLDocumentFragmentScannerImpl.java:2879)
>          at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl.next(XMLDocumentScannerImpl.java:602)
>          at java.xml/com.sun.org.apache.xerces.internal.impl.XMLNSDocumentScannerImpl.next(XMLNSDocumentScannerImpl.java:112)
>          at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanDocument(XMLDocumentFragmentScannerImpl.java:532)
>          at java.xml/com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:888)
>          at java.xml/com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:824)
>          at java.xml/com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(XMLParser.java:141)
>          at java.xml/com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(AbstractSAXParser.java:1213)
>          at java.xml/com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser.parse(SAXParserImpl.java:635)
>          at java.xml/com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl.parse(SAXParserImpl.java:324)
>          at java.xml/javax.xml.parsers.SAXParser.parse(SAXParser.java:197)
>          at org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.handleGeneralTextContainingPart(AbstractOOXMLExtractor.java:506)
>          at org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.processShapes(XSSFExcelExtractorDecorator.java:279)
>          at org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.buildXHTML(XSSFExcelExtractorDecorator.java:185)
>          at org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.getXHTML(AbstractOOXMLExtractor.java:135)
>          at org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.getXHTML(XSSFExcelExtractorDecorator.java:120)
>          at org.apache.tika.parser.microsoft.ooxml.OOXMLExtractorFactory.parse(OOXMLExtractorFactory.java:143)
>          at org.apache.tika.parser.microsoft.ooxml.OOXMLParser.parse(OOXMLParser.java:106)
>          at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
>          at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
>          at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:143)
>          at org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228)
>          at org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68)
>          at org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:195)
>          at org.apache.solr.core.SolrCore.execute(SolrCore.java:2503)
>          at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:711)
>          at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:517)
>          at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:384)
>          at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:330)
>          at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1629)
>          at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:533)
>          at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)
>          at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548)
>          at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
>          at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:190)
>          at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:1595)
>          at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:188)
>          at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1253)
>          at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:168)
>          at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:473)
>          at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1564)
>          at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:166)
>          at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1155)
>          at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
>          at org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:219)
>          at org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:126)
>          at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
>          at org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335)
>          at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
>          at org.eclipse.jetty.server.Server.handle(Server.java:530)
>          at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:347)
>          at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:256)
>          at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:279)
>          at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:102)
>          at org.eclipse.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:124)
>          at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:247)
>          at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.produce(EatWhatYouKill.java:140)
>          at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:131)
>          at org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:382)
>          at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:708)
>          at org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:626)
>          at java.base/java.lang.Thread.run(Thread.java:844)
> 2018-07-27 11:09:00.466 INFO  (qtp1521083627-93) [   x:core_share] o.a.s.u.DirectUpdateHandler2 start commit{_version_=1607134127551676416,optimize=false,openSearcher=true,waitSearcher=true,expungeDeletes=false,softCommit=false,prepareCommit=false}
> 2018-07-27 11:09:00.466 INFO  (qtp1521083627-93) [   x:core_share] o.a.s.u.DirectUpdateHandler2 No uncommitted changes. Skipping IW.commit.
> 2018-07-27 11:09:00.466 INFO  (qtp1521083627-93) [   x:core_share] o.a.s.u.DirectUpdateHandler2 end_commit_flush
> 2018-07-27 11:09:00.466 INFO  (qtp1521083627-93) [   x:core_share] o.a.s.u.p.LogUpdateProcessorFactory [core_share]  webapp=/solr path=/update/extract params={commit=true&wt=xml&version=2.2}{commit=} 0 1
>
>
>