You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@manifoldcf.apache.org by Bisonti Mario <Ma...@vimar.com> on 2018/07/27 07:04:27 UTC

Job stuck internal http error 500

Hallo.
My job is stucking indexing an xlsx file of 38MB

What could I do to solve my problem?

In the following there is the error:
2018-07-27 08:55:15.562 WARN  (qtp1521083627-52) [   x:core_share] o.e.j.s.HttpChannel /solr/core_share/update/extract
java.lang.OutOfMemoryError
        at java.base/java.lang.AbstractStringBuilder.hugeCapacity(AbstractStringBuilder.java:188)
        at java.base/java.lang.AbstractStringBuilder.newCapacity(AbstractStringBuilder.java:180)
        at java.base/java.lang.AbstractStringBuilder.ensureCapacityInternal(AbstractStringBuilder.java:147)
        at java.base/java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:660)
        at java.base/java.lang.StringBuilder.append(StringBuilder.java:195)
        at org.apache.solr.handler.extraction.SolrContentHandler.characters(SolrContentHandler.java:302)
        at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
        at org.apache.tika.sax.SecureContentHandler.characters(SecureContentHandler.java:270)
        at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
        at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
        at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
        at org.apache.tika.sax.SafeContentHandler.access$001(SafeContentHandler.java:46)
        at org.apache.tika.sax.SafeContentHandler$1.write(SafeContentHandler.java:82)
        at org.apache.tika.sax.SafeContentHandler.filter(SafeContentHandler.java:140)
        at org.apache.tika.sax.SafeContentHandler.characters(SafeContentHandler.java:287)
        at org.apache.tika.sax.XHTMLContentHandler.characters(XHTMLContentHandler.java:279)
        at org.apache.tika.sax.XHTMLContentHandler.characters(XHTMLContentHandler.java:306)
        at org.apache.tika.parser.microsoft.ooxml.OOXMLTikaBodyPartHandler.run(OOXMLTikaBodyPartHandler.java:147)
        at org.apache.tika.parser.microsoft.ooxml.OOXMLWordAndPowerPointTextHandler.handleEndOfRun(OOXMLWordAndPowerPointTextHandler.java:468)
        at org.apache.tika.parser.microsoft.ooxml.OOXMLWordAndPowerPointTextHandler.endElement(OOXMLWordAndPowerPointTextHandler.java:450)
        at org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)
        at org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)
        at java.xml/com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.endElement(AbstractSAXParser.java:609)
        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanEndElement(XMLDocumentFragmentScannerImpl.java:1714)
        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl$FragmentContentDriver.next(XMLDocumentFragmentScannerImpl.java:2879)
        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl.next(XMLDocumentScannerImpl.java:602)
        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLNSDocumentScannerImpl.next(XMLNSDocumentScannerImpl.java:112)
        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanDocument(XMLDocumentFragmentScannerImpl.java:532)
        at java.xml/com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:888)
        at java.xml/com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:824)
        at java.xml/com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(XMLParser.java:141)
        at java.xml/com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(AbstractSAXParser.java:1213)
        at java.xml/com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser.parse(SAXParserImpl.java:635)
        at java.xml/com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl.parse(SAXParserImpl.java:324)
        at java.xml/javax.xml.parsers.SAXParser.parse(SAXParser.java:197)
        at org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.handleGeneralTextContainingPart(AbstractOOXMLExtractor.java:506)
        at org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.processShapes(XSSFExcelExtractorDecorator.java:279)
        at org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.buildXHTML(XSSFExcelExtractorDecorator.java:185)
        at org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.getXHTML(AbstractOOXMLExtractor.java:135)
        at org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.getXHTML(XSSFExcelExtractorDecorator.java:120)
        at org.apache.tika.parser.microsoft.ooxml.OOXMLExtractorFactory.parse(OOXMLExtractorFactory.java:143)
        at org.apache.tika.parser.microsoft.ooxml.OOXMLParser.parse(OOXMLParser.java:106)
        at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
        at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
        at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:143)
        at org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228)
        at org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68)
        at org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:195)
        at org.apache.solr.core.SolrCore.execute(SolrCore.java:2503)
        at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:711)
        at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:517)
        at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:384)
        at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:330)
        at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1629)
        at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:533)
        at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)
        at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548)
        at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
        at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:190)
        at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:1595)
        at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:188)
        at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1253)
        at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:168)
        at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:473)
        at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1564)
        at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:166)
        at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1155)
        at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
        at org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:219)
        at org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:126)
        at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
        at org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335)
        at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
        at org.eclipse.jetty.server.Server.handle(Server.java:530)
        at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:347)
        at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:256)
        at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:279)
        at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:102)
        at org.eclipse.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:124)
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:247)
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.produce(EatWhatYouKill.java:140)
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:131)
        at org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:382)
        at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:708)
        at org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:626)
        at java.base/java.lang.Thread.run(Thread.java:844)



Re: Job stuck internal http error 500

Posted by Karl Wright <da...@gmail.com>.
Although it is not clear what process you are talking about.  If solr ask
them.

Karl

On Fri, Jul 27, 2018, 5:36 AM Karl Wright <da...@gmail.com> wrote:

> I am presuming you are using the examples.  If so, edit the options file
> to grant more memory to you agents process by increasing the Xmx value.
>
> Karl
>
> On Fri, Jul 27, 2018, 3:04 AM Bisonti Mario <Ma...@vimar.com>
> wrote:
>
>> Hallo.
>>
>> My job is stucking indexing an xlsx file of 38MB
>>
>>
>>
>> What could I do to solve my problem?
>>
>>
>>
>> In the following there is the error:
>> 2018-07-27 08:55:15.562 WARN  (qtp1521083627-52) [   x:core_share]
>> o.e.j.s.HttpChannel /solr/core_share/update/extract
>>
>> java.lang.OutOfMemoryError
>>
>>         at
>> java.base/java.lang.AbstractStringBuilder.hugeCapacity(AbstractStringBuilder.java:188)
>>
>>         at
>> java.base/java.lang.AbstractStringBuilder.newCapacity(AbstractStringBuilder.java:180)
>>
>>         at
>> java.base/java.lang.AbstractStringBuilder.ensureCapacityInternal(AbstractStringBuilder.java:147)
>>
>>         at
>> java.base/java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:660)
>>
>>         at
>> java.base/java.lang.StringBuilder.append(StringBuilder.java:195)
>>
>>         at
>> org.apache.solr.handler.extraction.SolrContentHandler.characters(SolrContentHandler.java:302)
>>
>>         at
>> org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
>>
>>         at
>> org.apache.tika.sax.SecureContentHandler.characters(SecureContentHandler.java:270)
>>
>>         at
>> org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
>>
>>         at
>> org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
>>
>>         at
>> org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
>>
>>         at
>> org.apache.tika.sax.SafeContentHandler.access$001(SafeContentHandler.java:46)
>>
>>         at
>> org.apache.tika.sax.SafeContentHandler$1.write(SafeContentHandler.java:82)
>>
>>         at
>> org.apache.tika.sax.SafeContentHandler.filter(SafeContentHandler.java:140)
>>
>>         at
>> org.apache.tika.sax.SafeContentHandler.characters(SafeContentHandler.java:287)
>>
>>         at
>> org.apache.tika.sax.XHTMLContentHandler.characters(XHTMLContentHandler.java:279)
>>
>>         at
>> org.apache.tika.sax.XHTMLContentHandler.characters(XHTMLContentHandler.java:306)
>>
>>         at
>> org.apache.tika.parser.microsoft.ooxml.OOXMLTikaBodyPartHandler.run(OOXMLTikaBodyPartHandler.java:147)
>>
>>         at
>> org.apache.tika.parser.microsoft.ooxml.OOXMLWordAndPowerPointTextHandler.handleEndOfRun(OOXMLWordAndPowerPointTextHandler.java:468)
>>
>>         at
>> org.apache.tika.parser.microsoft.ooxml.OOXMLWordAndPowerPointTextHandler.endElement(OOXMLWordAndPowerPointTextHandler.java:450)
>>
>>         at
>> org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)
>>
>>         at
>> org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)
>>
>>         at
>> java.xml/com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.endElement(AbstractSAXParser.java:609)
>>
>>         at
>> java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanEndElement(XMLDocumentFragmentScannerImpl.java:1714)
>>
>>         at
>> java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl$FragmentContentDriver.next(XMLDocumentFragmentScannerImpl.java:2879)
>>
>>         at
>> java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl.next(XMLDocumentScannerImpl.java:602)
>>
>>         at
>> java.xml/com.sun.org.apache.xerces.internal.impl.XMLNSDocumentScannerImpl.next(XMLNSDocumentScannerImpl.java:112)
>>
>>         at
>> java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanDocument(XMLDocumentFragmentScannerImpl.java:532)
>>
>>         at
>> java.xml/com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:888)
>>
>>         at
>> java.xml/com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:824)
>>
>>         at
>> java.xml/com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(XMLParser.java:141)
>>
>>         at
>> java.xml/com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(AbstractSAXParser.java:1213)
>>
>>         at
>> java.xml/com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser.parse(SAXParserImpl.java:635)
>>
>>         at
>> java.xml/com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl.parse(SAXParserImpl.java:324)
>>
>>         at java.xml/javax.xml.parsers.SAXParser.parse(SAXParser.java:197)
>>
>>         at
>> org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.handleGeneralTextContainingPart(AbstractOOXMLExtractor.java:506)
>>
>>         at
>> org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.processShapes(XSSFExcelExtractorDecorator.java:279)
>>
>>         at
>> org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.buildXHTML(XSSFExcelExtractorDecorator.java:185)
>>
>>         at
>> org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.getXHTML(AbstractOOXMLExtractor.java:135)
>>
>>         at
>> org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.getXHTML(XSSFExcelExtractorDecorator.java:120)
>>
>>         at
>> org.apache.tika.parser.microsoft.ooxml.OOXMLExtractorFactory.parse(OOXMLExtractorFactory.java:143)
>>
>>         at
>> org.apache.tika.parser.microsoft.ooxml.OOXMLParser.parse(OOXMLParser.java:106)
>>
>>         at
>> org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
>>
>>         at
>> org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
>>
>>         at
>> org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:143)
>>
>>         at
>> org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228)
>>
>>         at
>> org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68)
>>
>>         at
>> org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:195)
>>
>>         at org.apache.solr.core.SolrCore.execute(SolrCore.java:2503)
>>
>>         at
>> org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:711)
>>
>>         at
>> org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:517)
>>
>>         at
>> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:384)
>>
>>         at
>> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:330)
>>
>>         at
>> org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1629)
>>
>>         at
>> org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:533)
>>
>>         at
>> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)
>>
>>         at
>> org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548)
>>
>>         at
>> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
>>
>>         at
>> org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:190)
>>
>>         at
>> org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:1595)
>>
>>         at
>> org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:188)
>>
>>         at
>> org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1253)
>>
>>         at
>> org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:168)
>>
>>         at
>> org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:473)
>>
>>         at
>> org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1564)
>>
>>         at
>> org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:166)
>>
>>         at
>> org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1155)
>>
>>         at
>> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
>>
>>         at
>> org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:219)
>>
>>         at
>> org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:126)
>>
>>         at
>> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
>>
>>         at
>> org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335)
>>
>>         at
>> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
>>
>>         at org.eclipse.jetty.server.Server.handle(Server.java:530)
>>
>>         at
>> org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:347)
>>
>>         at
>> org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:256)
>>
>>         at
>> org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:279)
>>
>>         at
>> org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:102)
>>
>>         at
>> org.eclipse.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:124)
>>
>>         at
>> org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:247)
>>
>>         at
>> org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.produce(EatWhatYouKill.java:140)
>>
>>         at
>> org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:131)
>>
>>         at
>> org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:382)
>>
>>         at
>> org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:708)
>>
>>         at
>> org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:626)
>>
>>         at java.base/java.lang.Thread.run(Thread.java:844)
>>
>>
>>
>>
>>
>

R: Job stuck internal http error 500

Posted by Bisonti Mario <Ma...@vimar.com>.
I substitute all my four .jar tika files 1.17 (parsers, core, java7, xmp)  versions with the 1.19 versions nightly version and it works!
No more 500 error and the file has been indexed!

From the link:
https://builds.apache.org/job/tika-branch-1x/73/
you can use the subfolder:
Apache Tika core
Apache Tika Java-7 Components
Apache Tika parsers
Apache Tika XMP

I downloaded the:
tika-xmp-1.19-20180807.184545-61.jar
tika-core-1.19-20180807.184018-61.jar
tika-parsers-1.19-20180807.184508-61.jar
tika-java7-1.19-20180807.185414-60.jar
and I renamed them in:
-rw-r--r-- 1 root root  687651 Aug  8 14:16 tika-core-1.19.jar
-rw-r--r-- 1 root root   14012 Aug  8 14:16 tika-java7-1.19.jar
-rw-r--r-- 1 root root 1131862 Aug  8 14:16 tika-parsers-1.19.jar
-rw-r--r-- 1 root root   34447 Aug  8 14:16 tika-xmp-1.19.jar

So, in my /opt/solr-7.3.1/contrib/extraction/lib directory of solr I have:
-rw-r--r-- 1 root root  663109 Dec  9  2017 tika-core-1.17.jarOLD
-rw-r--r-- 1 root root  687651 Aug  8 14:16 tika-core-1.19.jar
-rw-r--r-- 1 root root   13268 Dec  9  2017 tika-java7-1.17.jarOLD
-rw-r--r-- 1 root root   14012 Aug  8 14:16 tika-java7-1.19.jar
-rw-r--r-- 1 root root 1078626 Dec  9  2017 tika-parsers-1.17.jarOO
-rw-r--r-- 1 root root 1131862 Aug  8 14:16 tika-parsers-1.19.jar
-rw-r--r-- 1 root root   33705 Dec  9  2017 tika-xmp-1.17.jarOLD
-rw-r--r-- 1 root root   34447 Aug  8 14:16 tika-xmp-1.19.jar

You have to restart solr to use the new tika version

Tha tika 1.19 version will be released in the next few weeks.

Here is the link about my issue:

https://issues.apache.org/jira/browse/TIKA-2703?page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel&focusedCommentId=16573125#comment-16573125


Mario



Da: Karl Wright <da...@gmail.com>
Inviato: mercoledì 8 agosto 2018 14:54
A: user@manifoldcf.apache.org
Oggetto: Re: Job stuck internal http error 500

Thanks for the update!

Did the Tika people say when 1.19 will be released?

Karl


On Wed, Aug 8, 2018 at 8:29 AM Bisonti Mario <Ma...@vimar.com>> wrote:
Hallo
You had right, Karl.

I have been helped by the tika people and they patched the tika jar of the solr installation and the problem was solved!

Now I solved using the tika 1.19 versions nightly build.


Thanks a lot.



Da: Karl Wright <da...@gmail.com>>
Inviato: venerdì 27 luglio 2018 12:39
A: user@manifoldcf.apache.org<ma...@manifoldcf.apache.org>
Oggetto: Re: Job stuck internal http error 500

I am afraid you will need to open a Tika ticket, and be prepared to attach your file to it.

Thanks,

Karl


On Fri, Jul 27, 2018 at 6:04 AM Bisonti Mario <Ma...@vimar.com>> wrote:
It isn’t a memory problem because xls file bigger (30MB) have been processed.

This file xlsm with many colors etc hang
I could suppose that it is a tika/solr erro but I don’t know how to solve it
☹

Oggetto: R: Job stuck internal http error 500

Yes, I am using:
/opt/manifoldcf/multiprocess-file-example-proprietary
I set:
sudo nano options.env.unix
-Xms2048m
-Xmx2048m

But I obtain the same error.
My doubt is that it could be a solr/tika problem.
What could I do?
I restrict the scan to a single file and I obtain the same error



Da: Karl Wright <da...@gmail.com>>
Inviato: venerdì 27 luglio 2018 11:36
A: user@manifoldcf.apache.org<ma...@manifoldcf.apache.org>
Oggetto: Re: Job stuck internal http error 500

I am presuming you are using the examples.  If so, edit the options file to grant more memory to you agents process by increasing the Xmx value.

Karl

On Fri, Jul 27, 2018, 3:04 AM Bisonti Mario <Ma...@vimar.com>> wrote:
Hallo.
My job is stucking indexing an xlsx file of 38MB

What could I do to solve my problem?

In the following there is the error:
2018-07-27 08:55:15.562 WARN  (qtp1521083627-52) [   x:core_share] o.e.j.s.HttpChannel /solr/core_share/update/extract
java.lang.OutOfMemoryError
        at java.base/java.lang.AbstractStringBuilder.hugeCapacity(AbstractStringBuilder.java:188)
        at java.base/java.lang.AbstractStringBuilder.newCapacity(AbstractStringBuilder.java:180)
        at java.base/java.lang.AbstractStringBuilder.ensureCapacityInternal(AbstractStringBuilder.java:147)
        at java.base/java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:660)
        at java.base/java.lang.StringBuilder.append(StringBuilder.java:195)
        at org.apache.solr.handler.extraction.SolrContentHandler.characters(SolrContentHandler.java:302)
        at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
        at org.apache.tika.sax.SecureContentHandler.characters(SecureContentHandler.java:270)
        at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
        at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
        at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
        at org.apache.tika.sax.SafeContentHandler.access$001(SafeContentHandler.java:46)
        at org.apache.tika.sax.SafeContentHandler$1.write(SafeContentHandler.java:82)
        at org.apache.tika.sax.SafeContentHandler.filter(SafeContentHandler.java:140)
        at org.apache.tika.sax.SafeContentHandler.characters(SafeContentHandler.java:287)
        at org.apache.tika.sax.XHTMLContentHandler.characters(XHTMLContentHandler.java:279)
        at org.apache.tika.sax.XHTMLContentHandler.characters(XHTMLContentHandler.java:306)
        at org.apache.tika.parser.microsoft.ooxml.OOXMLTikaBodyPartHandler.run(OOXMLTikaBodyPartHandler.java:147)
        at org.apache.tika.parser.microsoft.ooxml.OOXMLWordAndPowerPointTextHandler.handleEndOfRun(OOXMLWordAndPowerPointTextHandler.java:468)
        at org.apache.tika.parser.microsoft.ooxml.OOXMLWordAndPowerPointTextHandler.endElement(OOXMLWordAndPowerPointTextHandler.java:450)
        at org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)
        at org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)
        at java.xml/com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.endElement(AbstractSAXParser.java:609)
        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanEndElement(XMLDocumentFragmentScannerImpl.java:1714)
        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl$FragmentContentDriver.next(XMLDocumentFragmentScannerImpl.java:2879)
        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl.next(XMLDocumentScannerImpl.java:602)
        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLNSDocumentScannerImpl.next(XMLNSDocumentScannerImpl.java:112)
        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanDocument(XMLDocumentFragmentScannerImpl.java:532)
        at java.xml/com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:888)
        at java.xml/com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:824)
        at java.xml/com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(XMLParser.java:141)
        at java.xml/com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(AbstractSAXParser.java:1213)
        at java.xml/com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser.parse(SAXParserImpl.java:635)
        at java.xml/com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl.parse(SAXParserImpl.java:324)
        at java.xml/javax.xml.parsers.SAXParser.parse(SAXParser.java:197)
        at org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.handleGeneralTextContainingPart(AbstractOOXMLExtractor.java:506)
        at org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.processShapes(XSSFExcelExtractorDecorator.java:279)
        at org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.buildXHTML(XSSFExcelExtractorDecorator.java:185)
        at org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.getXHTML(AbstractOOXMLExtractor.java:135)
        at org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.getXHTML(XSSFExcelExtractorDecorator.java:120)
        at org.apache.tika.parser.microsoft.ooxml.OOXMLExtractorFactory.parse(OOXMLExtractorFactory.java:143)
        at org.apache.tika.parser.microsoft.ooxml.OOXMLParser.parse(OOXMLParser.java:106)
        at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
        at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
        at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:143)
        at org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228)
        at org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68)
        at org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:195)
        at org.apache.solr.core.SolrCore.execute(SolrCore.java:2503)
        at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:711)
        at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:517)
        at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:384)
        at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:330)
        at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1629)
        at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:533)
        at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)
        at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548)
        at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
        at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:190)
        at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:1595)
        at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:188)
        at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1253)
        at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:168)
        at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:473)
        at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1564)
        at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:166)
        at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1155)
        at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
        at org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:219)
        at org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:126)
        at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
        at org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335)
        at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
        at org.eclipse.jetty.server.Server.handle(Server.java:530)
        at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:347)
        at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:256)
        at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:279)
        at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:102)
        at org.eclipse.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:124)
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:247)
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.produce(EatWhatYouKill.java:140)
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:131)
        at org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:382)
        at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:708)
        at org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:626)
        at java.base/java.lang.Thread.run(Thread.java:844)



Re: Job stuck internal http error 500

Posted by Karl Wright <da...@gmail.com>.
Thanks for the update!

Did the Tika people say when 1.19 will be released?

Karl


On Wed, Aug 8, 2018 at 8:29 AM Bisonti Mario <Ma...@vimar.com>
wrote:

> Hallo
>
> You had right, Karl.
>
>
>
> I have been helped by the tika people and they patched the tika jar of the
> solr installation and the problem was solved!
>
>
>
> Now I solved using the tika 1.19 versions nightly build.
>
>
>
>
>
> Thanks a lot.
>
>
>
>
>
>
>
> *Da:* Karl Wright <da...@gmail.com>
> *Inviato:* venerdì 27 luglio 2018 12:39
> *A:* user@manifoldcf.apache.org
> *Oggetto:* Re: Job stuck internal http error 500
>
>
>
> I am afraid you will need to open a Tika ticket, and be prepared to attach
> your file to it.
>
>
>
> Thanks,
>
>
>
> Karl
>
>
>
>
>
> On Fri, Jul 27, 2018 at 6:04 AM Bisonti Mario <Ma...@vimar.com>
> wrote:
>
> It isn’t a memory problem because xls file bigger (30MB) have been
> processed.
>
>
>
> This file xlsm with many colors etc hang
>
> I could suppose that it is a tika/solr erro but I don’t know how to solve
> it
>
> ☹
>
>
>
> *Oggetto:* R: Job stuck internal http error 500
>
>
>
> Yes, I am using:
> /opt/manifoldcf/multiprocess-file-example-proprietary
> I set:
>
> sudo nano options.env.unix
>
> -Xms2048m
>
> -Xmx2048m
>
>
>
> But I obtain the same error.
>
> My doubt is that it could be a solr/tika problem.
>
> What could I do?
>
> I restrict the scan to a single file and I obtain the same error
>
>
>
>
>
>
>
> *Da:* Karl Wright <da...@gmail.com>
> *Inviato:* venerdì 27 luglio 2018 11:36
> *A:* user@manifoldcf.apache.org
> *Oggetto:* Re: Job stuck internal http error 500
>
>
>
> I am presuming you are using the examples.  If so, edit the options file
> to grant more memory to you agents process by increasing the Xmx value.
>
>
>
> Karl
>
>
>
> On Fri, Jul 27, 2018, 3:04 AM Bisonti Mario <Ma...@vimar.com>
> wrote:
>
> Hallo.
>
> My job is stucking indexing an xlsx file of 38MB
>
>
>
> What could I do to solve my problem?
>
>
>
> In the following there is the error:
> 2018-07-27 08:55:15.562 WARN  (qtp1521083627-52) [   x:core_share]
> o.e.j.s.HttpChannel /solr/core_share/update/extract
>
> java.lang.OutOfMemoryError
>
>         at
> java.base/java.lang.AbstractStringBuilder.hugeCapacity(AbstractStringBuilder.java:188)
>
>         at
> java.base/java.lang.AbstractStringBuilder.newCapacity(AbstractStringBuilder.java:180)
>
>         at
> java.base/java.lang.AbstractStringBuilder.ensureCapacityInternal(AbstractStringBuilder.java:147)
>
>         at
> java.base/java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:660)
>
>         at java.base/java.lang.StringBuilder.append(StringBuilder.java:195)
>
>         at
> org.apache.solr.handler.extraction.SolrContentHandler.characters(SolrContentHandler.java:302)
>
>         at
> org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
>
>         at
> org.apache.tika.sax.SecureContentHandler.characters(SecureContentHandler.java:270)
>
>         at
> org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
>
>         at
> org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
>
>         at
> org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
>
>         at
> org.apache.tika.sax.SafeContentHandler.access$001(SafeContentHandler.java:46)
>
>         at
> org.apache.tika.sax.SafeContentHandler$1.write(SafeContentHandler.java:82)
>
>         at
> org.apache.tika.sax.SafeContentHandler.filter(SafeContentHandler.java:140)
>
>         at
> org.apache.tika.sax.SafeContentHandler.characters(SafeContentHandler.java:287)
>
>         at
> org.apache.tika.sax.XHTMLContentHandler.characters(XHTMLContentHandler.java:279)
>
>         at
> org.apache.tika.sax.XHTMLContentHandler.characters(XHTMLContentHandler.java:306)
>
>         at
> org.apache.tika.parser.microsoft.ooxml.OOXMLTikaBodyPartHandler.run(OOXMLTikaBodyPartHandler.java:147)
>
>         at
> org.apache.tika.parser.microsoft.ooxml.OOXMLWordAndPowerPointTextHandler.handleEndOfRun(OOXMLWordAndPowerPointTextHandler.java:468)
>
>         at
> org.apache.tika.parser.microsoft.ooxml.OOXMLWordAndPowerPointTextHandler.endElement(OOXMLWordAndPowerPointTextHandler.java:450)
>
>         at
> org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)
>
>         at
> org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.endElement(AbstractSAXParser.java:609)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanEndElement(XMLDocumentFragmentScannerImpl.java:1714)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl$FragmentContentDriver.next(XMLDocumentFragmentScannerImpl.java:2879)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl.next(XMLDocumentScannerImpl.java:602)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.impl.XMLNSDocumentScannerImpl.next(XMLNSDocumentScannerImpl.java:112)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanDocument(XMLDocumentFragmentScannerImpl.java:532)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:888)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:824)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(XMLParser.java:141)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(AbstractSAXParser.java:1213)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser.parse(SAXParserImpl.java:635)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl.parse(SAXParserImpl.java:324)
>
>         at java.xml/javax.xml.parsers.SAXParser.parse(SAXParser.java:197)
>
>         at
> org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.handleGeneralTextContainingPart(AbstractOOXMLExtractor.java:506)
>
>         at
> org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.processShapes(XSSFExcelExtractorDecorator.java:279)
>
>         at
> org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.buildXHTML(XSSFExcelExtractorDecorator.java:185)
>
>         at
> org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.getXHTML(AbstractOOXMLExtractor.java:135)
>
>         at
> org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.getXHTML(XSSFExcelExtractorDecorator.java:120)
>
>         at
> org.apache.tika.parser.microsoft.ooxml.OOXMLExtractorFactory.parse(OOXMLExtractorFactory.java:143)
>
>         at
> org.apache.tika.parser.microsoft.ooxml.OOXMLParser.parse(OOXMLParser.java:106)
>
>         at
> org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
>
>         at
> org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
>
>         at
> org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:143)
>
>         at
> org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228)
>
>         at
> org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68)
>
>         at
> org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:195)
>
>         at org.apache.solr.core.SolrCore.execute(SolrCore.java:2503)
>
>         at
> org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:711)
>
>         at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:517)
>
>         at
> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:384)
>
>         at
> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:330)
>
>         at
> org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1629)
>
>         at
> org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:533)
>
>         at
> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)
>
>         at
> org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548)
>
>         at
> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
>
>         at
> org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:190)
>
>         at
> org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:1595)
>
>         at
> org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:188)
>
>         at
> org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1253)
>
>         at
> org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:168)
>
>         at
> org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:473)
>
>         at
> org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1564)
>
>         at
> org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:166)
>
>         at
> org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1155)
>
>         at
> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
>
>         at
> org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:219)
>
>         at
> org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:126)
>
>         at
> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
>
>         at
> org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335)
>
>         at
> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
>
>         at org.eclipse.jetty.server.Server.handle(Server.java:530)
>
>         at
> org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:347)
>
>         at
> org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:256)
>
>         at
> org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:279)
>
>         at
> org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:102)
>
>         at
> org.eclipse.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:124)
>
>         at
> org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:247)
>
>         at
> org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.produce(EatWhatYouKill.java:140)
>
>         at
> org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:131)
>
>         at
> org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:382)
>
>         at
> org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:708)
>
>         at
> org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:626)
>
>         at java.base/java.lang.Thread.run(Thread.java:844)
>
>
>
>
>
>

RE: Job stuck internal http error 500

Posted by msaunier <ms...@citya.com>.
Hello Mario,

 

I’m interested by this solution. Do you have a link to download Tika 1.19 bin jar?

 

You said : « patched the tika jar of the solr installation »

So, you have change the tika jar on Solr instance, not in ManifoldCF instance?

 

Thanks.

 

 

De : Bisonti Mario [mailto:Mario.Bisonti@vimar.com] 
Envoyé : mercredi 8 août 2018 14:29
À : user@manifoldcf.apache.org
Objet : R: Job stuck internal http error 500

 

Hallo

You had right, Karl.

 

I have been helped by the tika people and they patched the tika jar of the solr installation and the problem was solved!

 

Now I solved using the tika 1.19 versions nightly build.

 

 

Thanks a lot.

 

 

 

Da: Karl Wright <daddywri@gmail.com <ma...@gmail.com> > 
Inviato: venerdì 27 luglio 2018 12:39
A: user@manifoldcf.apache.org <ma...@manifoldcf.apache.org> 
Oggetto: Re: Job stuck internal http error 500

 

I am afraid you will need to open a Tika ticket, and be prepared to attach your file to it.

 

Thanks,

 

Karl

 

 

On Fri, Jul 27, 2018 at 6:04 AM Bisonti Mario <Mario.Bisonti@vimar.com <ma...@vimar.com> > wrote:

It isn’t a memory problem because xls file bigger (30MB) have been processed.

 

This file xlsm with many colors etc hang

I could suppose that it is a tika/solr erro but I don’t know how to solve it 

☹

 

Oggetto: R: Job stuck internal http error 500

 

Yes, I am using:
/opt/manifoldcf/multiprocess-file-example-proprietary
I set:

sudo nano options.env.unix

-Xms2048m

-Xmx2048m

 

But I obtain the same error.

My doubt is that it could be a solr/tika problem.

What could I do?

I restrict the scan to a single file and I obtain the same error

 

 

 

Da: Karl Wright <daddywri@gmail.com <ma...@gmail.com> > 
Inviato: venerdì 27 luglio 2018 11:36
A: user@manifoldcf.apache.org <ma...@manifoldcf.apache.org> 
Oggetto: Re: Job stuck internal http error 500

 

I am presuming you are using the examples.  If so, edit the options file to grant more memory to you agents process by increasing the Xmx value.

 

Karl

 

On Fri, Jul 27, 2018, 3:04 AM Bisonti Mario <Mario.Bisonti@vimar.com <ma...@vimar.com> > wrote:

Hallo.

My job is stucking indexing an xlsx file of 38MB

 

What could I do to solve my problem?

 

In the following there is the error:
2018-07-27 08:55:15.562 WARN  (qtp1521083627-52) [   x:core_share] o.e.j.s.HttpChannel /solr/core_share/update/extract

java.lang.OutOfMemoryError

        at java.base/java.lang.AbstractStringBuilder.hugeCapacity(AbstractStringBuilder.java:188)

        at java.base/java.lang.AbstractStringBuilder.newCapacity(AbstractStringBuilder.java:180)

        at java.base/java.lang.AbstractStringBuilder.ensureCapacityInternal(AbstractStringBuilder.java:147)

        at java.base/java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:660)

        at java.base/java.lang.StringBuilder.append(StringBuilder.java:195)

        at org.apache.solr.handler.extraction.SolrContentHandler.characters(SolrContentHandler.java:302)

        at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)

        at org.apache.tika.sax.SecureContentHandler.characters(SecureContentHandler.java:270)

        at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)

        at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)

        at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)

        at org.apache.tika.sax.SafeContentHandler.access$001(SafeContentHandler.java:46)

        at org.apache.tika.sax.SafeContentHandler$1.write(SafeContentHandler.java:82)

        at org.apache.tika.sax.SafeContentHandler.filter(SafeContentHandler.java:140)

        at org.apache.tika.sax.SafeContentHandler.characters(SafeContentHandler.java:287)

        at org.apache.tika.sax.XHTMLContentHandler.characters(XHTMLContentHandler.java:279)

        at org.apache.tika.sax.XHTMLContentHandler.characters(XHTMLContentHandler.java:306)

        at org.apache.tika.parser.microsoft.ooxml.OOXMLTikaBodyPartHandler.run(OOXMLTikaBodyPartHandler.java:147)

        at org.apache.tika.parser.microsoft.ooxml.OOXMLWordAndPowerPointTextHandler.handleEndOfRun(OOXMLWordAndPowerPointTextHandler.java:468)

        at org.apache.tika.parser.microsoft.ooxml.OOXMLWordAndPowerPointTextHandler.endElement(OOXMLWordAndPowerPointTextHandler.java:450)

        at org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)

        at org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)

        at java.xml/com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.endElement(AbstractSAXParser.java:609)

        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanEndElement(XMLDocumentFragmentScannerImpl.java:1714)

        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl$FragmentContentDriver.next(XMLDocumentFragmentScannerImpl.java:2879)

        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl.next(XMLDocumentScannerImpl.java:602)

        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLNSDocumentScannerImpl.next(XMLNSDocumentScannerImpl.java:112)

        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanDocument(XMLDocumentFragmentScannerImpl.java:532)

        at java.xml/com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:888)

        at java.xml/com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:824)

        at java.xml/com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(XMLParser.java:141)

        at java.xml/com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(AbstractSAXParser.java:1213)

        at java.xml/com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser.parse(SAXParserImpl.java:635)

        at java.xml/com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl.parse(SAXParserImpl.java:324)

        at java.xml/javax.xml.parsers.SAXParser.parse(SAXParser.java:197)

        at org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.handleGeneralTextContainingPart(AbstractOOXMLExtractor.java:506)

        at org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.processShapes(XSSFExcelExtractorDecorator.java:279)

        at org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.buildXHTML(XSSFExcelExtractorDecorator.java:185)

        at org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.getXHTML(AbstractOOXMLExtractor.java:135)

        at org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.getXHTML(XSSFExcelExtractorDecorator.java:120)

        at org.apache.tika.parser.microsoft.ooxml.OOXMLExtractorFactory.parse(OOXMLExtractorFactory.java:143)

        at org.apache.tika.parser.microsoft.ooxml.OOXMLParser.parse(OOXMLParser.java:106)

        at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)

        at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)

        at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:143)

        at org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228)

        at org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68)

        at org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:195)

        at org.apache.solr.core.SolrCore.execute(SolrCore.java:2503)

        at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:711)

        at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:517)

        at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:384)

        at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:330)

        at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1629)

        at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:533)

        at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)

        at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548)

        at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)

        at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:190)

        at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:1595)

        at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:188)

        at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1253)

        at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:168)

        at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:473)

        at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1564)

        at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:166)

        at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1155)

        at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)

        at org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:219)

        at org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:126)

        at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)

        at org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335)

        at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)

        at org.eclipse.jetty.server.Server.handle(Server.java:530)

        at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:347)

        at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:256)

        at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:279)

        at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:102)

        at org.eclipse.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:124)

        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:247)

        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.produce(EatWhatYouKill.java:140)

        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:131)

        at org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:382)

        at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:708)

        at org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:626)

        at java.base/java.lang.Thread.run(Thread.java:844)

 

 


R: Job stuck internal http error 500

Posted by Bisonti Mario <Ma...@vimar.com>.
Hallo
You had right, Karl.

I have been helped by the tika people and they patched the tika jar of the solr installation and the problem was solved!

Now I solved using the tika 1.19 versions nightly build.


Thanks a lot.



Da: Karl Wright <da...@gmail.com>
Inviato: venerdì 27 luglio 2018 12:39
A: user@manifoldcf.apache.org
Oggetto: Re: Job stuck internal http error 500

I am afraid you will need to open a Tika ticket, and be prepared to attach your file to it.

Thanks,

Karl


On Fri, Jul 27, 2018 at 6:04 AM Bisonti Mario <Ma...@vimar.com>> wrote:
It isn’t a memory problem because xls file bigger (30MB) have been processed.

This file xlsm with many colors etc hang
I could suppose that it is a tika/solr erro but I don’t know how to solve it
☹

Oggetto: R: Job stuck internal http error 500

Yes, I am using:
/opt/manifoldcf/multiprocess-file-example-proprietary
I set:
sudo nano options.env.unix
-Xms2048m
-Xmx2048m

But I obtain the same error.
My doubt is that it could be a solr/tika problem.
What could I do?
I restrict the scan to a single file and I obtain the same error



Da: Karl Wright <da...@gmail.com>>
Inviato: venerdì 27 luglio 2018 11:36
A: user@manifoldcf.apache.org<ma...@manifoldcf.apache.org>
Oggetto: Re: Job stuck internal http error 500

I am presuming you are using the examples.  If so, edit the options file to grant more memory to you agents process by increasing the Xmx value.

Karl

On Fri, Jul 27, 2018, 3:04 AM Bisonti Mario <Ma...@vimar.com>> wrote:
Hallo.
My job is stucking indexing an xlsx file of 38MB

What could I do to solve my problem?

In the following there is the error:
2018-07-27 08:55:15.562 WARN  (qtp1521083627-52) [   x:core_share] o.e.j.s.HttpChannel /solr/core_share/update/extract
java.lang.OutOfMemoryError
        at java.base/java.lang.AbstractStringBuilder.hugeCapacity(AbstractStringBuilder.java:188)
        at java.base/java.lang.AbstractStringBuilder.newCapacity(AbstractStringBuilder.java:180)
        at java.base/java.lang.AbstractStringBuilder.ensureCapacityInternal(AbstractStringBuilder.java:147)
        at java.base/java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:660)
        at java.base/java.lang.StringBuilder.append(StringBuilder.java:195)
        at org.apache.solr.handler.extraction.SolrContentHandler.characters(SolrContentHandler.java:302)
        at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
        at org.apache.tika.sax.SecureContentHandler.characters(SecureContentHandler.java:270)
        at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
        at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
        at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
        at org.apache.tika.sax.SafeContentHandler.access$001(SafeContentHandler.java:46)
        at org.apache.tika.sax.SafeContentHandler$1.write(SafeContentHandler.java:82)
        at org.apache.tika.sax.SafeContentHandler.filter(SafeContentHandler.java:140)
        at org.apache.tika.sax.SafeContentHandler.characters(SafeContentHandler.java:287)
        at org.apache.tika.sax.XHTMLContentHandler.characters(XHTMLContentHandler.java:279)
        at org.apache.tika.sax.XHTMLContentHandler.characters(XHTMLContentHandler.java:306)
        at org.apache.tika.parser.microsoft.ooxml.OOXMLTikaBodyPartHandler.run(OOXMLTikaBodyPartHandler.java:147)
        at org.apache.tika.parser.microsoft.ooxml.OOXMLWordAndPowerPointTextHandler.handleEndOfRun(OOXMLWordAndPowerPointTextHandler.java:468)
        at org.apache.tika.parser.microsoft.ooxml.OOXMLWordAndPowerPointTextHandler.endElement(OOXMLWordAndPowerPointTextHandler.java:450)
        at org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)
        at org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)
        at java.xml/com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.endElement(AbstractSAXParser.java:609)
        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanEndElement(XMLDocumentFragmentScannerImpl.java:1714)
        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl$FragmentContentDriver.next(XMLDocumentFragmentScannerImpl.java:2879)
        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl.next(XMLDocumentScannerImpl.java:602)
        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLNSDocumentScannerImpl.next(XMLNSDocumentScannerImpl.java:112)
        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanDocument(XMLDocumentFragmentScannerImpl.java:532)
        at java.xml/com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:888)
        at java.xml/com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:824)
        at java.xml/com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(XMLParser.java:141)
        at java.xml/com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(AbstractSAXParser.java:1213)
        at java.xml/com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser.parse(SAXParserImpl.java:635)
        at java.xml/com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl.parse(SAXParserImpl.java:324)
        at java.xml/javax.xml.parsers.SAXParser.parse(SAXParser.java:197)
        at org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.handleGeneralTextContainingPart(AbstractOOXMLExtractor.java:506)
        at org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.processShapes(XSSFExcelExtractorDecorator.java:279)
        at org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.buildXHTML(XSSFExcelExtractorDecorator.java:185)
        at org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.getXHTML(AbstractOOXMLExtractor.java:135)
        at org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.getXHTML(XSSFExcelExtractorDecorator.java:120)
        at org.apache.tika.parser.microsoft.ooxml.OOXMLExtractorFactory.parse(OOXMLExtractorFactory.java:143)
        at org.apache.tika.parser.microsoft.ooxml.OOXMLParser.parse(OOXMLParser.java:106)
        at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
        at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
        at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:143)
        at org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228)
        at org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68)
        at org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:195)
        at org.apache.solr.core.SolrCore.execute(SolrCore.java:2503)
        at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:711)
        at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:517)
        at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:384)
        at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:330)
        at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1629)
        at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:533)
        at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)
        at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548)
        at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
        at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:190)
        at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:1595)
        at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:188)
        at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1253)
        at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:168)
        at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:473)
        at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1564)
        at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:166)
        at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1155)
        at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
        at org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:219)
        at org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:126)
        at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
        at org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335)
        at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
        at org.eclipse.jetty.server.Server.handle(Server.java:530)
        at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:347)
        at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:256)
        at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:279)
        at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:102)
        at org.eclipse.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:124)
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:247)
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.produce(EatWhatYouKill.java:140)
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:131)
        at org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:382)
        at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:708)
        at org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:626)
        at java.base/java.lang.Thread.run(Thread.java:844)



Re: Job stuck internal http error 500

Posted by Karl Wright <da...@gmail.com>.
I am afraid you will need to open a Tika ticket, and be prepared to attach
your file to it.

Thanks,

Karl


On Fri, Jul 27, 2018 at 6:04 AM Bisonti Mario <Ma...@vimar.com>
wrote:

> It isn’t a memory problem because xls file bigger (30MB) have been
> processed.
>
>
>
> This file xlsm with many colors etc hang
>
> I could suppose that it is a tika/solr erro but I don’t know how to solve
> it
>
> ☹
>
>
>
> *Oggetto:* R: Job stuck internal http error 500
>
>
>
> Yes, I am using:
> /opt/manifoldcf/multiprocess-file-example-proprietary
> I set:
>
> sudo nano options.env.unix
>
> -Xms2048m
>
> -Xmx2048m
>
>
>
> But I obtain the same error.
>
> My doubt is that it could be a solr/tika problem.
>
> What could I do?
>
> I restrict the scan to a single file and I obtain the same error
>
>
>
>
>
>
>
> *Da:* Karl Wright <da...@gmail.com>
> *Inviato:* venerdì 27 luglio 2018 11:36
> *A:* user@manifoldcf.apache.org
> *Oggetto:* Re: Job stuck internal http error 500
>
>
>
> I am presuming you are using the examples.  If so, edit the options file
> to grant more memory to you agents process by increasing the Xmx value.
>
>
>
> Karl
>
>
>
> On Fri, Jul 27, 2018, 3:04 AM Bisonti Mario <Ma...@vimar.com>
> wrote:
>
> Hallo.
>
> My job is stucking indexing an xlsx file of 38MB
>
>
>
> What could I do to solve my problem?
>
>
>
> In the following there is the error:
> 2018-07-27 08:55:15.562 WARN  (qtp1521083627-52) [   x:core_share]
> o.e.j.s.HttpChannel /solr/core_share/update/extract
>
> java.lang.OutOfMemoryError
>
>         at
> java.base/java.lang.AbstractStringBuilder.hugeCapacity(AbstractStringBuilder.java:188)
>
>         at
> java.base/java.lang.AbstractStringBuilder.newCapacity(AbstractStringBuilder.java:180)
>
>         at
> java.base/java.lang.AbstractStringBuilder.ensureCapacityInternal(AbstractStringBuilder.java:147)
>
>         at
> java.base/java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:660)
>
>         at java.base/java.lang.StringBuilder.append(StringBuilder.java:195)
>
>         at
> org.apache.solr.handler.extraction.SolrContentHandler.characters(SolrContentHandler.java:302)
>
>         at
> org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
>
>         at
> org.apache.tika.sax.SecureContentHandler.characters(SecureContentHandler.java:270)
>
>         at
> org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
>
>         at
> org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
>
>         at
> org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
>
>         at
> org.apache.tika.sax.SafeContentHandler.access$001(SafeContentHandler.java:46)
>
>         at
> org.apache.tika.sax.SafeContentHandler$1.write(SafeContentHandler.java:82)
>
>         at
> org.apache.tika.sax.SafeContentHandler.filter(SafeContentHandler.java:140)
>
>         at
> org.apache.tika.sax.SafeContentHandler.characters(SafeContentHandler.java:287)
>
>         at
> org.apache.tika.sax.XHTMLContentHandler.characters(XHTMLContentHandler.java:279)
>
>         at
> org.apache.tika.sax.XHTMLContentHandler.characters(XHTMLContentHandler.java:306)
>
>         at
> org.apache.tika.parser.microsoft.ooxml.OOXMLTikaBodyPartHandler.run(OOXMLTikaBodyPartHandler.java:147)
>
>         at
> org.apache.tika.parser.microsoft.ooxml.OOXMLWordAndPowerPointTextHandler.handleEndOfRun(OOXMLWordAndPowerPointTextHandler.java:468)
>
>         at
> org.apache.tika.parser.microsoft.ooxml.OOXMLWordAndPowerPointTextHandler.endElement(OOXMLWordAndPowerPointTextHandler.java:450)
>
>         at
> org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)
>
>         at
> org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.endElement(AbstractSAXParser.java:609)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanEndElement(XMLDocumentFragmentScannerImpl.java:1714)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl$FragmentContentDriver.next(XMLDocumentFragmentScannerImpl.java:2879)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl.next(XMLDocumentScannerImpl.java:602)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.impl.XMLNSDocumentScannerImpl.next(XMLNSDocumentScannerImpl.java:112)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanDocument(XMLDocumentFragmentScannerImpl.java:532)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:888)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:824)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(XMLParser.java:141)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(AbstractSAXParser.java:1213)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser.parse(SAXParserImpl.java:635)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl.parse(SAXParserImpl.java:324)
>
>         at java.xml/javax.xml.parsers.SAXParser.parse(SAXParser.java:197)
>
>         at
> org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.handleGeneralTextContainingPart(AbstractOOXMLExtractor.java:506)
>
>         at
> org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.processShapes(XSSFExcelExtractorDecorator.java:279)
>
>         at
> org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.buildXHTML(XSSFExcelExtractorDecorator.java:185)
>
>         at
> org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.getXHTML(AbstractOOXMLExtractor.java:135)
>
>         at
> org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.getXHTML(XSSFExcelExtractorDecorator.java:120)
>
>         at
> org.apache.tika.parser.microsoft.ooxml.OOXMLExtractorFactory.parse(OOXMLExtractorFactory.java:143)
>
>         at
> org.apache.tika.parser.microsoft.ooxml.OOXMLParser.parse(OOXMLParser.java:106)
>
>         at
> org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
>
>         at
> org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
>
>         at
> org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:143)
>
>         at
> org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228)
>
>         at
> org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68)
>
>         at
> org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:195)
>
>         at org.apache.solr.core.SolrCore.execute(SolrCore.java:2503)
>
>         at
> org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:711)
>
>         at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:517)
>
>         at
> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:384)
>
>         at
> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:330)
>
>         at
> org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1629)
>
>         at
> org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:533)
>
>         at
> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)
>
>         at
> org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548)
>
>         at
> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
>
>         at
> org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:190)
>
>         at
> org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:1595)
>
>         at
> org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:188)
>
>         at
> org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1253)
>
>         at
> org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:168)
>
>         at
> org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:473)
>
>         at
> org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1564)
>
>         at
> org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:166)
>
>         at
> org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1155)
>
>         at
> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
>
>         at
> org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:219)
>
>         at
> org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:126)
>
>         at
> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
>
>         at
> org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335)
>
>         at
> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
>
>         at org.eclipse.jetty.server.Server.handle(Server.java:530)
>
>         at
> org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:347)
>
>         at
> org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:256)
>
>         at
> org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:279)
>
>         at
> org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:102)
>
>         at
> org.eclipse.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:124)
>
>         at
> org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:247)
>
>         at
> org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.produce(EatWhatYouKill.java:140)
>
>         at
> org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:131)
>
>         at
> org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:382)
>
>         at
> org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:708)
>
>         at
> org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:626)
>
>         at java.base/java.lang.Thread.run(Thread.java:844)
>
>
>
>
>
>

R: Job stuck internal http error 500

Posted by Bisonti Mario <Ma...@vimar.com>.
It isn’t a memory problem because xls file bigger (30MB) have been processed.

This file xlsm with many colors etc hang
I could suppose that it is a tika/solr erro but I don’t know how to solve it
☹

Oggetto: R: Job stuck internal http error 500

Yes, I am using:
/opt/manifoldcf/multiprocess-file-example-proprietary
I set:
sudo nano options.env.unix
-Xms2048m
-Xmx2048m

But I obtain the same error.
My doubt is that it could be a solr/tika problem.
What could I do?
I restrict the scan to a single file and I obtain the same error



Da: Karl Wright <da...@gmail.com>>
Inviato: venerdì 27 luglio 2018 11:36
A: user@manifoldcf.apache.org<ma...@manifoldcf.apache.org>
Oggetto: Re: Job stuck internal http error 500

I am presuming you are using the examples.  If so, edit the options file to grant more memory to you agents process by increasing the Xmx value.

Karl

On Fri, Jul 27, 2018, 3:04 AM Bisonti Mario <Ma...@vimar.com>> wrote:
Hallo.
My job is stucking indexing an xlsx file of 38MB

What could I do to solve my problem?

In the following there is the error:
2018-07-27 08:55:15.562 WARN  (qtp1521083627-52) [   x:core_share] o.e.j.s.HttpChannel /solr/core_share/update/extract
java.lang.OutOfMemoryError
        at java.base/java.lang.AbstractStringBuilder.hugeCapacity(AbstractStringBuilder.java:188)
        at java.base/java.lang.AbstractStringBuilder.newCapacity(AbstractStringBuilder.java:180)
        at java.base/java.lang.AbstractStringBuilder.ensureCapacityInternal(AbstractStringBuilder.java:147)
        at java.base/java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:660)
        at java.base/java.lang.StringBuilder.append(StringBuilder.java:195)
        at org.apache.solr.handler.extraction.SolrContentHandler.characters(SolrContentHandler.java:302)
        at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
        at org.apache.tika.sax.SecureContentHandler.characters(SecureContentHandler.java:270)
        at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
        at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
        at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
        at org.apache.tika.sax.SafeContentHandler.access$001(SafeContentHandler.java:46)
        at org.apache.tika.sax.SafeContentHandler$1.write(SafeContentHandler.java:82)
        at org.apache.tika.sax.SafeContentHandler.filter(SafeContentHandler.java:140)
        at org.apache.tika.sax.SafeContentHandler.characters(SafeContentHandler.java:287)
        at org.apache.tika.sax.XHTMLContentHandler.characters(XHTMLContentHandler.java:279)
        at org.apache.tika.sax.XHTMLContentHandler.characters(XHTMLContentHandler.java:306)
        at org.apache.tika.parser.microsoft.ooxml.OOXMLTikaBodyPartHandler.run(OOXMLTikaBodyPartHandler.java:147)
        at org.apache.tika.parser.microsoft.ooxml.OOXMLWordAndPowerPointTextHandler.handleEndOfRun(OOXMLWordAndPowerPointTextHandler.java:468)
        at org.apache.tika.parser.microsoft.ooxml.OOXMLWordAndPowerPointTextHandler.endElement(OOXMLWordAndPowerPointTextHandler.java:450)
        at org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)
        at org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)
        at java.xml/com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.endElement(AbstractSAXParser.java:609)
        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanEndElement(XMLDocumentFragmentScannerImpl.java:1714)
        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl$FragmentContentDriver.next(XMLDocumentFragmentScannerImpl.java:2879)
        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl.next(XMLDocumentScannerImpl.java:602)
        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLNSDocumentScannerImpl.next(XMLNSDocumentScannerImpl.java:112)
        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanDocument(XMLDocumentFragmentScannerImpl.java:532)
        at java.xml/com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:888)
        at java.xml/com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:824)
        at java.xml/com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(XMLParser.java:141)
        at java.xml/com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(AbstractSAXParser.java:1213)
        at java.xml/com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser.parse(SAXParserImpl.java:635)
        at java.xml/com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl.parse(SAXParserImpl.java:324)
        at java.xml/javax.xml.parsers.SAXParser.parse(SAXParser.java:197)
        at org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.handleGeneralTextContainingPart(AbstractOOXMLExtractor.java:506)
        at org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.processShapes(XSSFExcelExtractorDecorator.java:279)
        at org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.buildXHTML(XSSFExcelExtractorDecorator.java:185)
        at org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.getXHTML(AbstractOOXMLExtractor.java:135)
        at org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.getXHTML(XSSFExcelExtractorDecorator.java:120)
        at org.apache.tika.parser.microsoft.ooxml.OOXMLExtractorFactory.parse(OOXMLExtractorFactory.java:143)
        at org.apache.tika.parser.microsoft.ooxml.OOXMLParser.parse(OOXMLParser.java:106)
        at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
        at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
        at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:143)
        at org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228)
        at org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68)
        at org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:195)
        at org.apache.solr.core.SolrCore.execute(SolrCore.java:2503)
        at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:711)
        at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:517)
        at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:384)
        at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:330)
        at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1629)
        at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:533)
        at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)
        at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548)
        at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
        at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:190)
        at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:1595)
        at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:188)
        at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1253)
        at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:168)
        at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:473)
        at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1564)
        at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:166)
        at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1155)
        at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
        at org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:219)
        at org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:126)
        at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
        at org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335)
        at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
        at org.eclipse.jetty.server.Server.handle(Server.java:530)
        at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:347)
        at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:256)
        at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:279)
        at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:102)
        at org.eclipse.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:124)
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:247)
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.produce(EatWhatYouKill.java:140)
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:131)
        at org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:382)
        at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:708)
        at org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:626)
        at java.base/java.lang.Thread.run(Thread.java:844)



R: Job stuck internal http error 500

Posted by Bisonti Mario <Ma...@vimar.com>.
Yes, I am using:
/opt/manifoldcf/multiprocess-file-example-proprietary
I set:
sudo nano options.env.unix
-Xms2048m
-Xmx2048m

But I obtain the same error.
My doubt is that it could be a solr/tika problem.
What could I do?
I restrict the scan to a single file and I obtain the same error



Da: Karl Wright <da...@gmail.com>
Inviato: venerdì 27 luglio 2018 11:36
A: user@manifoldcf.apache.org
Oggetto: Re: Job stuck internal http error 500

I am presuming you are using the examples.  If so, edit the options file to grant more memory to you agents process by increasing the Xmx value.

Karl

On Fri, Jul 27, 2018, 3:04 AM Bisonti Mario <Ma...@vimar.com>> wrote:
Hallo.
My job is stucking indexing an xlsx file of 38MB

What could I do to solve my problem?

In the following there is the error:
2018-07-27 08:55:15.562 WARN  (qtp1521083627-52) [   x:core_share] o.e.j.s.HttpChannel /solr/core_share/update/extract
java.lang.OutOfMemoryError
        at java.base/java.lang.AbstractStringBuilder.hugeCapacity(AbstractStringBuilder.java:188)
        at java.base/java.lang.AbstractStringBuilder.newCapacity(AbstractStringBuilder.java:180)
        at java.base/java.lang.AbstractStringBuilder.ensureCapacityInternal(AbstractStringBuilder.java:147)
        at java.base/java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:660)
        at java.base/java.lang.StringBuilder.append(StringBuilder.java:195)
        at org.apache.solr.handler.extraction.SolrContentHandler.characters(SolrContentHandler.java:302)
        at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
        at org.apache.tika.sax.SecureContentHandler.characters(SecureContentHandler.java:270)
        at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
        at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
        at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
        at org.apache.tika.sax.SafeContentHandler.access$001(SafeContentHandler.java:46)
        at org.apache.tika.sax.SafeContentHandler$1.write(SafeContentHandler.java:82)
        at org.apache.tika.sax.SafeContentHandler.filter(SafeContentHandler.java:140)
        at org.apache.tika.sax.SafeContentHandler.characters(SafeContentHandler.java:287)
        at org.apache.tika.sax.XHTMLContentHandler.characters(XHTMLContentHandler.java:279)
        at org.apache.tika.sax.XHTMLContentHandler.characters(XHTMLContentHandler.java:306)
        at org.apache.tika.parser.microsoft.ooxml.OOXMLTikaBodyPartHandler.run(OOXMLTikaBodyPartHandler.java:147)
        at org.apache.tika.parser.microsoft.ooxml.OOXMLWordAndPowerPointTextHandler.handleEndOfRun(OOXMLWordAndPowerPointTextHandler.java:468)
        at org.apache.tika.parser.microsoft.ooxml.OOXMLWordAndPowerPointTextHandler.endElement(OOXMLWordAndPowerPointTextHandler.java:450)
        at org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)
        at org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)
        at java.xml/com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.endElement(AbstractSAXParser.java:609)
        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanEndElement(XMLDocumentFragmentScannerImpl.java:1714)
        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl$FragmentContentDriver.next(XMLDocumentFragmentScannerImpl.java:2879)
        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl.next(XMLDocumentScannerImpl.java:602)
        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLNSDocumentScannerImpl.next(XMLNSDocumentScannerImpl.java:112)
        at java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanDocument(XMLDocumentFragmentScannerImpl.java:532)
        at java.xml/com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:888)
        at java.xml/com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:824)
        at java.xml/com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(XMLParser.java:141)
        at java.xml/com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(AbstractSAXParser.java:1213)
        at java.xml/com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser.parse(SAXParserImpl.java:635)
        at java.xml/com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl.parse(SAXParserImpl.java:324)
        at java.xml/javax.xml.parsers.SAXParser.parse(SAXParser.java:197)
        at org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.handleGeneralTextContainingPart(AbstractOOXMLExtractor.java:506)
        at org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.processShapes(XSSFExcelExtractorDecorator.java:279)
        at org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.buildXHTML(XSSFExcelExtractorDecorator.java:185)
        at org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.getXHTML(AbstractOOXMLExtractor.java:135)
        at org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.getXHTML(XSSFExcelExtractorDecorator.java:120)
        at org.apache.tika.parser.microsoft.ooxml.OOXMLExtractorFactory.parse(OOXMLExtractorFactory.java:143)
        at org.apache.tika.parser.microsoft.ooxml.OOXMLParser.parse(OOXMLParser.java:106)
        at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
        at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
        at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:143)
        at org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228)
        at org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68)
        at org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:195)
        at org.apache.solr.core.SolrCore.execute(SolrCore.java:2503)
        at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:711)
        at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:517)
        at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:384)
        at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:330)
        at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1629)
        at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:533)
        at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)
        at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548)
        at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
        at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:190)
        at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:1595)
        at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:188)
        at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1253)
        at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:168)
        at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:473)
        at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1564)
        at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:166)
        at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1155)
        at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
        at org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:219)
        at org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:126)
        at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
        at org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335)
        at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
        at org.eclipse.jetty.server.Server.handle(Server.java:530)
        at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:347)
        at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:256)
        at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:279)
        at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:102)
        at org.eclipse.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:124)
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:247)
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.produce(EatWhatYouKill.java:140)
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:131)
        at org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:382)
        at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:708)
        at org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:626)
        at java.base/java.lang.Thread.run(Thread.java:844)



Re: Job stuck internal http error 500

Posted by Karl Wright <da...@gmail.com>.
I am presuming you are using the examples.  If so, edit the options file to
grant more memory to you agents process by increasing the Xmx value.

Karl

On Fri, Jul 27, 2018, 3:04 AM Bisonti Mario <Ma...@vimar.com> wrote:

> Hallo.
>
> My job is stucking indexing an xlsx file of 38MB
>
>
>
> What could I do to solve my problem?
>
>
>
> In the following there is the error:
> 2018-07-27 08:55:15.562 WARN  (qtp1521083627-52) [   x:core_share]
> o.e.j.s.HttpChannel /solr/core_share/update/extract
>
> java.lang.OutOfMemoryError
>
>         at
> java.base/java.lang.AbstractStringBuilder.hugeCapacity(AbstractStringBuilder.java:188)
>
>         at
> java.base/java.lang.AbstractStringBuilder.newCapacity(AbstractStringBuilder.java:180)
>
>         at
> java.base/java.lang.AbstractStringBuilder.ensureCapacityInternal(AbstractStringBuilder.java:147)
>
>         at
> java.base/java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:660)
>
>         at java.base/java.lang.StringBuilder.append(StringBuilder.java:195)
>
>         at
> org.apache.solr.handler.extraction.SolrContentHandler.characters(SolrContentHandler.java:302)
>
>         at
> org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
>
>         at
> org.apache.tika.sax.SecureContentHandler.characters(SecureContentHandler.java:270)
>
>         at
> org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
>
>         at
> org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
>
>         at
> org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:146)
>
>         at
> org.apache.tika.sax.SafeContentHandler.access$001(SafeContentHandler.java:46)
>
>         at
> org.apache.tika.sax.SafeContentHandler$1.write(SafeContentHandler.java:82)
>
>         at
> org.apache.tika.sax.SafeContentHandler.filter(SafeContentHandler.java:140)
>
>         at
> org.apache.tika.sax.SafeContentHandler.characters(SafeContentHandler.java:287)
>
>         at
> org.apache.tika.sax.XHTMLContentHandler.characters(XHTMLContentHandler.java:279)
>
>         at
> org.apache.tika.sax.XHTMLContentHandler.characters(XHTMLContentHandler.java:306)
>
>         at
> org.apache.tika.parser.microsoft.ooxml.OOXMLTikaBodyPartHandler.run(OOXMLTikaBodyPartHandler.java:147)
>
>         at
> org.apache.tika.parser.microsoft.ooxml.OOXMLWordAndPowerPointTextHandler.handleEndOfRun(OOXMLWordAndPowerPointTextHandler.java:468)
>
>         at
> org.apache.tika.parser.microsoft.ooxml.OOXMLWordAndPowerPointTextHandler.endElement(OOXMLWordAndPowerPointTextHandler.java:450)
>
>         at
> org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)
>
>         at
> org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.endElement(AbstractSAXParser.java:609)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanEndElement(XMLDocumentFragmentScannerImpl.java:1714)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl$FragmentContentDriver.next(XMLDocumentFragmentScannerImpl.java:2879)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl.next(XMLDocumentScannerImpl.java:602)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.impl.XMLNSDocumentScannerImpl.next(XMLNSDocumentScannerImpl.java:112)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanDocument(XMLDocumentFragmentScannerImpl.java:532)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:888)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:824)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(XMLParser.java:141)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(AbstractSAXParser.java:1213)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser.parse(SAXParserImpl.java:635)
>
>         at
> java.xml/com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl.parse(SAXParserImpl.java:324)
>
>         at java.xml/javax.xml.parsers.SAXParser.parse(SAXParser.java:197)
>
>         at
> org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.handleGeneralTextContainingPart(AbstractOOXMLExtractor.java:506)
>
>         at
> org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.processShapes(XSSFExcelExtractorDecorator.java:279)
>
>         at
> org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.buildXHTML(XSSFExcelExtractorDecorator.java:185)
>
>         at
> org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.getXHTML(AbstractOOXMLExtractor.java:135)
>
>         at
> org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.getXHTML(XSSFExcelExtractorDecorator.java:120)
>
>         at
> org.apache.tika.parser.microsoft.ooxml.OOXMLExtractorFactory.parse(OOXMLExtractorFactory.java:143)
>
>         at
> org.apache.tika.parser.microsoft.ooxml.OOXMLParser.parse(OOXMLParser.java:106)
>
>         at
> org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
>
>         at
> org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
>
>         at
> org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:143)
>
>         at
> org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228)
>
>         at
> org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68)
>
>         at
> org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:195)
>
>         at org.apache.solr.core.SolrCore.execute(SolrCore.java:2503)
>
>         at
> org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:711)
>
>         at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:517)
>
>         at
> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:384)
>
>         at
> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:330)
>
>         at
> org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1629)
>
>         at
> org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:533)
>
>         at
> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)
>
>         at
> org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548)
>
>         at
> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
>
>         at
> org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:190)
>
>         at
> org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:1595)
>
>         at
> org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:188)
>
>         at
> org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1253)
>
>         at
> org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:168)
>
>         at
> org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:473)
>
>         at
> org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1564)
>
>         at
> org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:166)
>
>         at
> org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1155)
>
>         at
> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
>
>         at
> org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:219)
>
>         at
> org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:126)
>
>         at
> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
>
>         at
> org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335)
>
>         at
> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
>
>         at org.eclipse.jetty.server.Server.handle(Server.java:530)
>
>         at
> org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:347)
>
>         at
> org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:256)
>
>         at
> org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:279)
>
>         at
> org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:102)
>
>         at
> org.eclipse.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:124)
>
>         at
> org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:247)
>
>         at
> org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.produce(EatWhatYouKill.java:140)
>
>         at
> org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:131)
>
>         at
> org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:382)
>
>         at
> org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:708)
>
>         at
> org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:626)
>
>         at java.base/java.lang.Thread.run(Thread.java:844)
>
>
>
>
>