You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@tika.apache.org by "Hong-Thai Nguyen (JIRA)" <ji...@apache.org> on 2013/07/23 15:46:49 UTC

[jira] [Updated] (TIKA-1152) Process stucks on parsing of a CHM file

     [ https://issues.apache.org/jira/browse/TIKA-1152?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Hong-Thai Nguyen updated TIKA-1152:
-----------------------------------

    Attachment: eventcombmt.chm
    
> Process stucks on parsing of a CHM file
> ---------------------------------------
>
>                 Key: TIKA-1152
>                 URL: https://issues.apache.org/jira/browse/TIKA-1152
>             Project: Tika
>          Issue Type: Bug
>          Components: parser
>    Affects Versions: 1.4
>         Environment: Windows/Linux
>            Reporter: Hong-Thai Nguyen
>            Priority: Critical
>             Fix For: 1.5
>
>         Attachments: eventcombmt.chm
>
>
> By parsing the attachment CHM file (MS Microsoft Help Files), Java process stucks.
> {code}
> Thread[main,5,main]
> 	org.apache.tika.parser.chm.lzx.ChmLzxBlock.extractContent(ChmLzxBlock.java:203)
> 	org.apache.tika.parser.chm.lzx.ChmLzxBlock.<init>(ChmLzxBlock.java:77)
> 	org.apache.tika.parser.chm.core.ChmExtractor.extractChmEntry(ChmExtractor.java:338)
> 	org.apache.tika.parser.chm.CHMDocumentInformation.getContent(CHMDocumentInformation.java:72)
> 	org.apache.tika.parser.chm.CHMDocumentInformation.getText(CHMDocumentInformation.java:141)
> 	org.apache.tika.parser.chm.CHM2XHTML.process(CHM2XHTML.java:34)
> 	org.apache.tika.parser.chm.ChmParser.parse(ChmParser.java:51)
> 	org.apache.tika.parser.ParserDecorator.parse(ParserDecorator.java:91)
> 	org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:242)
> 	org.apache.tika.parser.AbstractParser.parse(AbstractParser.java:53)
> 	com.polyspot.document.converter.DocumentConverter.realizeConversion(DocumentConverter.java:192)
> 	com.polyspot.document.converter.DocumentConverter.convert(DocumentConverter.java:114)
> 	com.polyspot.wscrawlers.PsDocConverter.getConvertedDocument(PsDocConverter.java:77)
> 	com.polyspot.wscrawlers.Converter.getConvertedDocument(Converter.java:81)
> 	com.polyspot.wscrawlers.AbstractConverter.getDirectConvertedDocument(AbstractConverter.java:139)
> 	com.polyspot.connector.framework.convert.PES5ConversionService.convert(PES5ConversionService.java:43)
> 	com.polyspot.connector.framework.convert.ConversionService.findDocumentSplitterAndCallConvert(ConversionService.java:362)
> 	com.polyspot.connector.framework.convert.ConversionService.convertAndGenerateThumbnailForMasterFile(ConversionService.java:291)
> 	com.polyspot.connector.framework.processors.ConvertAndMergeMasterFile.process(ConvertAndMergeMasterFile.java:40)
> 	com.polyspot.connector.framework.processors.SequenceDocumentProcessor.process(SequenceDocumentProcessor.java:21)
> 	com.polyspot.connector.framework.plugins.DocumentBuilderPlugin.computeDocument(DocumentBuilderPlugin.java:48)
> 	com.polyspot.connector.framework.plugins.PluginsManager.computeDocument(PluginsManager.java:219)
> 	com.polyspot.connector.framework.orchestrators.Orchestrator.processOutOfDateNode(Orchestrator.java:201)
> 	com.polyspot.connector.framework.orchestrators.Orchestrator.processGrantedNode(Orchestrator.java:172)
> 	com.polyspot.connector.framework.orchestrators.Orchestrator.processNode(Orchestrator.java:237)
> 	com.polyspot.connector.framework.orchestrators.Orchestrator.synchronize(Orchestrator.java:288)
> 	com.polyspot.connector.framework.orchestrators.OrchestratorMonoThreaded.requestSynchronizeOnceCreated(OrchestratorMonoThreaded.java:16)
> 	com.polyspot.connector.framework.orchestrators.Orchestrator.requestSynchronize(Orchestrator.java:108)
> 	com.polyspot.connector.framework.MonitoredNodeExecutor.requestChildExecution(MonitoredNodeExecutor.java:29)
> 	com.polyspot.connector.knowledgetree.model.content.KnowledgeTreeDocumentMetadata.synchronizeAllChildren(KnowledgeTreeDocumentMetadata.java:98)
> 	com.polyspot.connector.framework.orchestrators.Orchestrator.syncChildren(Orchestrator.java:311)
> 	com.polyspot.connector.framework.orchestrators.Orchestrator.processGrantedNode(Orchestrator.java:177)
> 	com.polyspot.connector.framework.orchestrators.Orchestrator.processNode(Orchestrator.java:237)
> 	com.polyspot.connector.framework.orchestrators.Orchestrator.synchronize(Orchestrator.java:288)
> 	com.polyspot.connector.framework.orchestrators.OrchestratorMonoThreaded.requestSynchronizeOnceCreated(OrchestratorMonoThreaded.java:16)
> 	com.polyspot.connector.framework.orchestrators.Orchestrator.requestSynchronize(Orchestrator.java:108)
> 	com.polyspot.connector.framework.MonitoredNodeExecutor.requestChildExecution(MonitoredNodeExecutor.java:29)
> 	com.polyspot.connector.knowledgetree.driver.db.DBKnowledgeTreeDriver.executeAllDocuments(DBKnowledgeTreeDriver.java:71)
> 	com.polyspot.connector.knowledgetree.driver.KnowledgeTreeDriver.executeAllDocuments(KnowledgeTreeDriver.java:107)
> 	com.polyspot.connector.knowledgetree.model.content.KnowledgeTreeContent.synchronizeAllChildren(KnowledgeTreeContent.java:28)
> 	com.polyspot.connector.framework.orchestrators.Orchestrator.syncChildren(Orchestrator.java:311)
> 	com.polyspot.connector.framework.orchestrators.Orchestrator.processGrantedNode(Orchestrator.java:177)
> 	com.polyspot.connector.framework.orchestrators.Orchestrator.processNode(Orchestrator.java:237)
> 	com.polyspot.connector.framework.orchestrators.Orchestrator.synchronize(Orchestrator.java:288)
> 	com.polyspot.connector.framework.orchestrators.OrchestratorMonoThreaded.requestSynchronizeOnceCreated(OrchestratorMonoThreaded.java:16)
> 	com.polyspot.connector.framework.orchestrators.Orchestrator.requestSynchronize(Orchestrator.java:108)
> 	com.polyspot.connector.framework.MonitoredNodeExecutor.requestChildExecution(MonitoredNodeExecutor.java:29)
> 	com.polyspot.connector.knowledgetree.model.KnowledgeTreeServer.synchronizeAllChildren(KnowledgeTreeServer.java:80)
> 	com.polyspot.connector.framework.orchestrators.Orchestrator.syncChildren(Orchestrator.java:311)
> 	com.polyspot.connector.framework.orchestrators.Orchestrator.processGrantedNode(Orchestrator.java:177)
> 	com.polyspot.connector.framework.orchestrators.Orchestrator.processNode(Orchestrator.java:237)
> 	com.polyspot.connector.framework.orchestrators.Orchestrator.synchronize(Orchestrator.java:288)
> 	com.polyspot.connector.framework.orchestrators.OrchestratorMonoThreaded.requestSynchronizeOnceCreated(OrchestratorMonoThreaded.java:16)
> 	com.polyspot.connector.framework.orchestrators.Orchestrator.requestSynchronize(Orchestrator.java:108)
> 	com.polyspot.connector.framework.MonitoredNodeExecutor.requestChildExecution(MonitoredNodeExecutor.java:29)
> 	com.polyspot.connector.knowledgetree.model.KnowledgeTreeRoot.synchronizeAllChildren(KnowledgeTreeRoot.java:81)
> 	com.polyspot.connector.framework.orchestrators.Orchestrator.syncChildren(Orchestrator.java:311)
> 	com.polyspot.connector.framework.orchestrators.Orchestrator.processGrantedNode(Orchestrator.java:177)
> 	com.polyspot.connector.framework.orchestrators.Orchestrator.processNode(Orchestrator.java:237)
> 	com.polyspot.connector.framework.orchestrators.Orchestrator.synchronize(Orchestrator.java:288)
> 	com.polyspot.connector.framework.orchestrators.OrchestratorMonoThreaded.requestSynchronizeOnceCreated(OrchestratorMonoThreaded.java:16)
> 	com.polyspot.connector.framework.orchestrators.Orchestrator.requestSynchronize(Orchestrator.java:108)
> 	com.polyspot.connector.framework.orchestrators.Orchestrator.synchronize(Orchestrator.java:92)
> 	com.polyspot.connector.framework.Connector.synchronize(Connector.java:51)
> 	com.polyspot.connector.framework.pes.Runner.synchronize(Runner.java:104)
> 	com.polyspot.connector.framework.pes.Runner.main(Runner.java:88)
> 	com.polyspot.connector.framework.pes.Runner.main(Runner.java:74)
> 	com.polyspot.connector.knowledgetree.MainPESKnowledgeTreeConnector.main(MainPESKnowledgeTreeConnector.java:26)
> {code}

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators
For more information on JIRA, see: http://www.atlassian.com/software/jira

Re: [jira] [Updated] (TIKA-1152) Process stucks on parsing of a CHM file

Posted by Oleg Tikhonov <ol...@apache.org>.
Hi, can you attach the problematic file ?
Thanks.


On Tue, Jul 23, 2013 at 4:46 PM, Hong-Thai Nguyen (JIRA) <ji...@apache.org>wrote:

>
>      [
> https://issues.apache.org/jira/browse/TIKA-1152?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel]
>
> Hong-Thai Nguyen updated TIKA-1152:
> -----------------------------------
>
>     Attachment: eventcombmt.chm
>
> > Process stucks on parsing of a CHM file
> > ---------------------------------------
> >
> >                 Key: TIKA-1152
> >                 URL: https://issues.apache.org/jira/browse/TIKA-1152
> >             Project: Tika
> >          Issue Type: Bug
> >          Components: parser
> >    Affects Versions: 1.4
> >         Environment: Windows/Linux
> >            Reporter: Hong-Thai Nguyen
> >            Priority: Critical
> >             Fix For: 1.5
> >
> >         Attachments: eventcombmt.chm
> >
> >
> > By parsing the attachment CHM file (MS Microsoft Help Files), Java
> process stucks.
> > {code}
> > Thread[main,5,main]
> >
> org.apache.tika.parser.chm.lzx.ChmLzxBlock.extractContent(ChmLzxBlock.java:203)
> >
> org.apache.tika.parser.chm.lzx.ChmLzxBlock.<init>(ChmLzxBlock.java:77)
> >
> org.apache.tika.parser.chm.core.ChmExtractor.extractChmEntry(ChmExtractor.java:338)
> >
> org.apache.tika.parser.chm.CHMDocumentInformation.getContent(CHMDocumentInformation.java:72)
> >
> org.apache.tika.parser.chm.CHMDocumentInformation.getText(CHMDocumentInformation.java:141)
> >       org.apache.tika.parser.chm.CHM2XHTML.process(CHM2XHTML.java:34)
> >       org.apache.tika.parser.chm.ChmParser.parse(ChmParser.java:51)
> >
> org.apache.tika.parser.ParserDecorator.parse(ParserDecorator.java:91)
> >
> org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:242)
> >       org.apache.tika.parser.AbstractParser.parse(AbstractParser.java:53)
> >
> com.polyspot.document.converter.DocumentConverter.realizeConversion(DocumentConverter.java:192)
> >
> com.polyspot.document.converter.DocumentConverter.convert(DocumentConverter.java:114)
> >
> com.polyspot.wscrawlers.PsDocConverter.getConvertedDocument(PsDocConverter.java:77)
> >
> com.polyspot.wscrawlers.Converter.getConvertedDocument(Converter.java:81)
> >
> com.polyspot.wscrawlers.AbstractConverter.getDirectConvertedDocument(AbstractConverter.java:139)
> >
> com.polyspot.connector.framework.convert.PES5ConversionService.convert(PES5ConversionService.java:43)
> >
> com.polyspot.connector.framework.convert.ConversionService.findDocumentSplitterAndCallConvert(ConversionService.java:362)
> >
> com.polyspot.connector.framework.convert.ConversionService.convertAndGenerateThumbnailForMasterFile(ConversionService.java:291)
> >
> com.polyspot.connector.framework.processors.ConvertAndMergeMasterFile.process(ConvertAndMergeMasterFile.java:40)
> >
> com.polyspot.connector.framework.processors.SequenceDocumentProcessor.process(SequenceDocumentProcessor.java:21)
> >
> com.polyspot.connector.framework.plugins.DocumentBuilderPlugin.computeDocument(DocumentBuilderPlugin.java:48)
> >
> com.polyspot.connector.framework.plugins.PluginsManager.computeDocument(PluginsManager.java:219)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.processOutOfDateNode(Orchestrator.java:201)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.processGrantedNode(Orchestrator.java:172)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.processNode(Orchestrator.java:237)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.synchronize(Orchestrator.java:288)
> >
> com.polyspot.connector.framework.orchestrators.OrchestratorMonoThreaded.requestSynchronizeOnceCreated(OrchestratorMonoThreaded.java:16)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.requestSynchronize(Orchestrator.java:108)
> >
> com.polyspot.connector.framework.MonitoredNodeExecutor.requestChildExecution(MonitoredNodeExecutor.java:29)
> >
> com.polyspot.connector.knowledgetree.model.content.KnowledgeTreeDocumentMetadata.synchronizeAllChildren(KnowledgeTreeDocumentMetadata.java:98)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.syncChildren(Orchestrator.java:311)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.processGrantedNode(Orchestrator.java:177)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.processNode(Orchestrator.java:237)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.synchronize(Orchestrator.java:288)
> >
> com.polyspot.connector.framework.orchestrators.OrchestratorMonoThreaded.requestSynchronizeOnceCreated(OrchestratorMonoThreaded.java:16)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.requestSynchronize(Orchestrator.java:108)
> >
> com.polyspot.connector.framework.MonitoredNodeExecutor.requestChildExecution(MonitoredNodeExecutor.java:29)
> >
> com.polyspot.connector.knowledgetree.driver.db.DBKnowledgeTreeDriver.executeAllDocuments(DBKnowledgeTreeDriver.java:71)
> >
> com.polyspot.connector.knowledgetree.driver.KnowledgeTreeDriver.executeAllDocuments(KnowledgeTreeDriver.java:107)
> >
> com.polyspot.connector.knowledgetree.model.content.KnowledgeTreeContent.synchronizeAllChildren(KnowledgeTreeContent.java:28)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.syncChildren(Orchestrator.java:311)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.processGrantedNode(Orchestrator.java:177)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.processNode(Orchestrator.java:237)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.synchronize(Orchestrator.java:288)
> >
> com.polyspot.connector.framework.orchestrators.OrchestratorMonoThreaded.requestSynchronizeOnceCreated(OrchestratorMonoThreaded.java:16)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.requestSynchronize(Orchestrator.java:108)
> >
> com.polyspot.connector.framework.MonitoredNodeExecutor.requestChildExecution(MonitoredNodeExecutor.java:29)
> >
> com.polyspot.connector.knowledgetree.model.KnowledgeTreeServer.synchronizeAllChildren(KnowledgeTreeServer.java:80)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.syncChildren(Orchestrator.java:311)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.processGrantedNode(Orchestrator.java:177)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.processNode(Orchestrator.java:237)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.synchronize(Orchestrator.java:288)
> >
> com.polyspot.connector.framework.orchestrators.OrchestratorMonoThreaded.requestSynchronizeOnceCreated(OrchestratorMonoThreaded.java:16)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.requestSynchronize(Orchestrator.java:108)
> >
> com.polyspot.connector.framework.MonitoredNodeExecutor.requestChildExecution(MonitoredNodeExecutor.java:29)
> >
> com.polyspot.connector.knowledgetree.model.KnowledgeTreeRoot.synchronizeAllChildren(KnowledgeTreeRoot.java:81)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.syncChildren(Orchestrator.java:311)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.processGrantedNode(Orchestrator.java:177)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.processNode(Orchestrator.java:237)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.synchronize(Orchestrator.java:288)
> >
> com.polyspot.connector.framework.orchestrators.OrchestratorMonoThreaded.requestSynchronizeOnceCreated(OrchestratorMonoThreaded.java:16)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.requestSynchronize(Orchestrator.java:108)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.synchronize(Orchestrator.java:92)
> >
> com.polyspot.connector.framework.Connector.synchronize(Connector.java:51)
> >
> com.polyspot.connector.framework.pes.Runner.synchronize(Runner.java:104)
> >       com.polyspot.connector.framework.pes.Runner.main(Runner.java:88)
> >       com.polyspot.connector.framework.pes.Runner.main(Runner.java:74)
> >
> com.polyspot.connector.knowledgetree.MainPESKnowledgeTreeConnector.main(MainPESKnowledgeTreeConnector.java:26)
> > {code}
>
> --
> This message is automatically generated by JIRA.
> If you think it was sent incorrectly, please contact your JIRA
> administrators
> For more information on JIRA, see: http://www.atlassian.com/software/jira
>