You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@tika.apache.org by "Hong-Thai Nguyen (JIRA)" <ji...@apache.org> on 2013/07/23 15:46:49 UTC
[jira] [Updated] (TIKA-1152) Process stucks on parsing of a CHM
file
[ https://issues.apache.org/jira/browse/TIKA-1152?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Hong-Thai Nguyen updated TIKA-1152:
-----------------------------------
Attachment: eventcombmt.chm
> Process stucks on parsing of a CHM file
> ---------------------------------------
>
> Key: TIKA-1152
> URL: https://issues.apache.org/jira/browse/TIKA-1152
> Project: Tika
> Issue Type: Bug
> Components: parser
> Affects Versions: 1.4
> Environment: Windows/Linux
> Reporter: Hong-Thai Nguyen
> Priority: Critical
> Fix For: 1.5
>
> Attachments: eventcombmt.chm
>
>
> By parsing the attachment CHM file (MS Microsoft Help Files), Java process stucks.
> {code}
> Thread[main,5,main]
> org.apache.tika.parser.chm.lzx.ChmLzxBlock.extractContent(ChmLzxBlock.java:203)
> org.apache.tika.parser.chm.lzx.ChmLzxBlock.<init>(ChmLzxBlock.java:77)
> org.apache.tika.parser.chm.core.ChmExtractor.extractChmEntry(ChmExtractor.java:338)
> org.apache.tika.parser.chm.CHMDocumentInformation.getContent(CHMDocumentInformation.java:72)
> org.apache.tika.parser.chm.CHMDocumentInformation.getText(CHMDocumentInformation.java:141)
> org.apache.tika.parser.chm.CHM2XHTML.process(CHM2XHTML.java:34)
> org.apache.tika.parser.chm.ChmParser.parse(ChmParser.java:51)
> org.apache.tika.parser.ParserDecorator.parse(ParserDecorator.java:91)
> org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:242)
> org.apache.tika.parser.AbstractParser.parse(AbstractParser.java:53)
> com.polyspot.document.converter.DocumentConverter.realizeConversion(DocumentConverter.java:192)
> com.polyspot.document.converter.DocumentConverter.convert(DocumentConverter.java:114)
> com.polyspot.wscrawlers.PsDocConverter.getConvertedDocument(PsDocConverter.java:77)
> com.polyspot.wscrawlers.Converter.getConvertedDocument(Converter.java:81)
> com.polyspot.wscrawlers.AbstractConverter.getDirectConvertedDocument(AbstractConverter.java:139)
> com.polyspot.connector.framework.convert.PES5ConversionService.convert(PES5ConversionService.java:43)
> com.polyspot.connector.framework.convert.ConversionService.findDocumentSplitterAndCallConvert(ConversionService.java:362)
> com.polyspot.connector.framework.convert.ConversionService.convertAndGenerateThumbnailForMasterFile(ConversionService.java:291)
> com.polyspot.connector.framework.processors.ConvertAndMergeMasterFile.process(ConvertAndMergeMasterFile.java:40)
> com.polyspot.connector.framework.processors.SequenceDocumentProcessor.process(SequenceDocumentProcessor.java:21)
> com.polyspot.connector.framework.plugins.DocumentBuilderPlugin.computeDocument(DocumentBuilderPlugin.java:48)
> com.polyspot.connector.framework.plugins.PluginsManager.computeDocument(PluginsManager.java:219)
> com.polyspot.connector.framework.orchestrators.Orchestrator.processOutOfDateNode(Orchestrator.java:201)
> com.polyspot.connector.framework.orchestrators.Orchestrator.processGrantedNode(Orchestrator.java:172)
> com.polyspot.connector.framework.orchestrators.Orchestrator.processNode(Orchestrator.java:237)
> com.polyspot.connector.framework.orchestrators.Orchestrator.synchronize(Orchestrator.java:288)
> com.polyspot.connector.framework.orchestrators.OrchestratorMonoThreaded.requestSynchronizeOnceCreated(OrchestratorMonoThreaded.java:16)
> com.polyspot.connector.framework.orchestrators.Orchestrator.requestSynchronize(Orchestrator.java:108)
> com.polyspot.connector.framework.MonitoredNodeExecutor.requestChildExecution(MonitoredNodeExecutor.java:29)
> com.polyspot.connector.knowledgetree.model.content.KnowledgeTreeDocumentMetadata.synchronizeAllChildren(KnowledgeTreeDocumentMetadata.java:98)
> com.polyspot.connector.framework.orchestrators.Orchestrator.syncChildren(Orchestrator.java:311)
> com.polyspot.connector.framework.orchestrators.Orchestrator.processGrantedNode(Orchestrator.java:177)
> com.polyspot.connector.framework.orchestrators.Orchestrator.processNode(Orchestrator.java:237)
> com.polyspot.connector.framework.orchestrators.Orchestrator.synchronize(Orchestrator.java:288)
> com.polyspot.connector.framework.orchestrators.OrchestratorMonoThreaded.requestSynchronizeOnceCreated(OrchestratorMonoThreaded.java:16)
> com.polyspot.connector.framework.orchestrators.Orchestrator.requestSynchronize(Orchestrator.java:108)
> com.polyspot.connector.framework.MonitoredNodeExecutor.requestChildExecution(MonitoredNodeExecutor.java:29)
> com.polyspot.connector.knowledgetree.driver.db.DBKnowledgeTreeDriver.executeAllDocuments(DBKnowledgeTreeDriver.java:71)
> com.polyspot.connector.knowledgetree.driver.KnowledgeTreeDriver.executeAllDocuments(KnowledgeTreeDriver.java:107)
> com.polyspot.connector.knowledgetree.model.content.KnowledgeTreeContent.synchronizeAllChildren(KnowledgeTreeContent.java:28)
> com.polyspot.connector.framework.orchestrators.Orchestrator.syncChildren(Orchestrator.java:311)
> com.polyspot.connector.framework.orchestrators.Orchestrator.processGrantedNode(Orchestrator.java:177)
> com.polyspot.connector.framework.orchestrators.Orchestrator.processNode(Orchestrator.java:237)
> com.polyspot.connector.framework.orchestrators.Orchestrator.synchronize(Orchestrator.java:288)
> com.polyspot.connector.framework.orchestrators.OrchestratorMonoThreaded.requestSynchronizeOnceCreated(OrchestratorMonoThreaded.java:16)
> com.polyspot.connector.framework.orchestrators.Orchestrator.requestSynchronize(Orchestrator.java:108)
> com.polyspot.connector.framework.MonitoredNodeExecutor.requestChildExecution(MonitoredNodeExecutor.java:29)
> com.polyspot.connector.knowledgetree.model.KnowledgeTreeServer.synchronizeAllChildren(KnowledgeTreeServer.java:80)
> com.polyspot.connector.framework.orchestrators.Orchestrator.syncChildren(Orchestrator.java:311)
> com.polyspot.connector.framework.orchestrators.Orchestrator.processGrantedNode(Orchestrator.java:177)
> com.polyspot.connector.framework.orchestrators.Orchestrator.processNode(Orchestrator.java:237)
> com.polyspot.connector.framework.orchestrators.Orchestrator.synchronize(Orchestrator.java:288)
> com.polyspot.connector.framework.orchestrators.OrchestratorMonoThreaded.requestSynchronizeOnceCreated(OrchestratorMonoThreaded.java:16)
> com.polyspot.connector.framework.orchestrators.Orchestrator.requestSynchronize(Orchestrator.java:108)
> com.polyspot.connector.framework.MonitoredNodeExecutor.requestChildExecution(MonitoredNodeExecutor.java:29)
> com.polyspot.connector.knowledgetree.model.KnowledgeTreeRoot.synchronizeAllChildren(KnowledgeTreeRoot.java:81)
> com.polyspot.connector.framework.orchestrators.Orchestrator.syncChildren(Orchestrator.java:311)
> com.polyspot.connector.framework.orchestrators.Orchestrator.processGrantedNode(Orchestrator.java:177)
> com.polyspot.connector.framework.orchestrators.Orchestrator.processNode(Orchestrator.java:237)
> com.polyspot.connector.framework.orchestrators.Orchestrator.synchronize(Orchestrator.java:288)
> com.polyspot.connector.framework.orchestrators.OrchestratorMonoThreaded.requestSynchronizeOnceCreated(OrchestratorMonoThreaded.java:16)
> com.polyspot.connector.framework.orchestrators.Orchestrator.requestSynchronize(Orchestrator.java:108)
> com.polyspot.connector.framework.orchestrators.Orchestrator.synchronize(Orchestrator.java:92)
> com.polyspot.connector.framework.Connector.synchronize(Connector.java:51)
> com.polyspot.connector.framework.pes.Runner.synchronize(Runner.java:104)
> com.polyspot.connector.framework.pes.Runner.main(Runner.java:88)
> com.polyspot.connector.framework.pes.Runner.main(Runner.java:74)
> com.polyspot.connector.knowledgetree.MainPESKnowledgeTreeConnector.main(MainPESKnowledgeTreeConnector.java:26)
> {code}
--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators
For more information on JIRA, see: http://www.atlassian.com/software/jira
Re: [jira] [Updated] (TIKA-1152) Process stucks on parsing of a CHM file
Posted by Oleg Tikhonov <ol...@apache.org>.
Hi, can you attach the problematic file ?
Thanks.
On Tue, Jul 23, 2013 at 4:46 PM, Hong-Thai Nguyen (JIRA) <ji...@apache.org>wrote:
>
> [
> https://issues.apache.org/jira/browse/TIKA-1152?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel]
>
> Hong-Thai Nguyen updated TIKA-1152:
> -----------------------------------
>
> Attachment: eventcombmt.chm
>
> > Process stucks on parsing of a CHM file
> > ---------------------------------------
> >
> > Key: TIKA-1152
> > URL: https://issues.apache.org/jira/browse/TIKA-1152
> > Project: Tika
> > Issue Type: Bug
> > Components: parser
> > Affects Versions: 1.4
> > Environment: Windows/Linux
> > Reporter: Hong-Thai Nguyen
> > Priority: Critical
> > Fix For: 1.5
> >
> > Attachments: eventcombmt.chm
> >
> >
> > By parsing the attachment CHM file (MS Microsoft Help Files), Java
> process stucks.
> > {code}
> > Thread[main,5,main]
> >
> org.apache.tika.parser.chm.lzx.ChmLzxBlock.extractContent(ChmLzxBlock.java:203)
> >
> org.apache.tika.parser.chm.lzx.ChmLzxBlock.<init>(ChmLzxBlock.java:77)
> >
> org.apache.tika.parser.chm.core.ChmExtractor.extractChmEntry(ChmExtractor.java:338)
> >
> org.apache.tika.parser.chm.CHMDocumentInformation.getContent(CHMDocumentInformation.java:72)
> >
> org.apache.tika.parser.chm.CHMDocumentInformation.getText(CHMDocumentInformation.java:141)
> > org.apache.tika.parser.chm.CHM2XHTML.process(CHM2XHTML.java:34)
> > org.apache.tika.parser.chm.ChmParser.parse(ChmParser.java:51)
> >
> org.apache.tika.parser.ParserDecorator.parse(ParserDecorator.java:91)
> >
> org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:242)
> > org.apache.tika.parser.AbstractParser.parse(AbstractParser.java:53)
> >
> com.polyspot.document.converter.DocumentConverter.realizeConversion(DocumentConverter.java:192)
> >
> com.polyspot.document.converter.DocumentConverter.convert(DocumentConverter.java:114)
> >
> com.polyspot.wscrawlers.PsDocConverter.getConvertedDocument(PsDocConverter.java:77)
> >
> com.polyspot.wscrawlers.Converter.getConvertedDocument(Converter.java:81)
> >
> com.polyspot.wscrawlers.AbstractConverter.getDirectConvertedDocument(AbstractConverter.java:139)
> >
> com.polyspot.connector.framework.convert.PES5ConversionService.convert(PES5ConversionService.java:43)
> >
> com.polyspot.connector.framework.convert.ConversionService.findDocumentSplitterAndCallConvert(ConversionService.java:362)
> >
> com.polyspot.connector.framework.convert.ConversionService.convertAndGenerateThumbnailForMasterFile(ConversionService.java:291)
> >
> com.polyspot.connector.framework.processors.ConvertAndMergeMasterFile.process(ConvertAndMergeMasterFile.java:40)
> >
> com.polyspot.connector.framework.processors.SequenceDocumentProcessor.process(SequenceDocumentProcessor.java:21)
> >
> com.polyspot.connector.framework.plugins.DocumentBuilderPlugin.computeDocument(DocumentBuilderPlugin.java:48)
> >
> com.polyspot.connector.framework.plugins.PluginsManager.computeDocument(PluginsManager.java:219)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.processOutOfDateNode(Orchestrator.java:201)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.processGrantedNode(Orchestrator.java:172)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.processNode(Orchestrator.java:237)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.synchronize(Orchestrator.java:288)
> >
> com.polyspot.connector.framework.orchestrators.OrchestratorMonoThreaded.requestSynchronizeOnceCreated(OrchestratorMonoThreaded.java:16)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.requestSynchronize(Orchestrator.java:108)
> >
> com.polyspot.connector.framework.MonitoredNodeExecutor.requestChildExecution(MonitoredNodeExecutor.java:29)
> >
> com.polyspot.connector.knowledgetree.model.content.KnowledgeTreeDocumentMetadata.synchronizeAllChildren(KnowledgeTreeDocumentMetadata.java:98)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.syncChildren(Orchestrator.java:311)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.processGrantedNode(Orchestrator.java:177)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.processNode(Orchestrator.java:237)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.synchronize(Orchestrator.java:288)
> >
> com.polyspot.connector.framework.orchestrators.OrchestratorMonoThreaded.requestSynchronizeOnceCreated(OrchestratorMonoThreaded.java:16)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.requestSynchronize(Orchestrator.java:108)
> >
> com.polyspot.connector.framework.MonitoredNodeExecutor.requestChildExecution(MonitoredNodeExecutor.java:29)
> >
> com.polyspot.connector.knowledgetree.driver.db.DBKnowledgeTreeDriver.executeAllDocuments(DBKnowledgeTreeDriver.java:71)
> >
> com.polyspot.connector.knowledgetree.driver.KnowledgeTreeDriver.executeAllDocuments(KnowledgeTreeDriver.java:107)
> >
> com.polyspot.connector.knowledgetree.model.content.KnowledgeTreeContent.synchronizeAllChildren(KnowledgeTreeContent.java:28)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.syncChildren(Orchestrator.java:311)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.processGrantedNode(Orchestrator.java:177)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.processNode(Orchestrator.java:237)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.synchronize(Orchestrator.java:288)
> >
> com.polyspot.connector.framework.orchestrators.OrchestratorMonoThreaded.requestSynchronizeOnceCreated(OrchestratorMonoThreaded.java:16)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.requestSynchronize(Orchestrator.java:108)
> >
> com.polyspot.connector.framework.MonitoredNodeExecutor.requestChildExecution(MonitoredNodeExecutor.java:29)
> >
> com.polyspot.connector.knowledgetree.model.KnowledgeTreeServer.synchronizeAllChildren(KnowledgeTreeServer.java:80)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.syncChildren(Orchestrator.java:311)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.processGrantedNode(Orchestrator.java:177)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.processNode(Orchestrator.java:237)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.synchronize(Orchestrator.java:288)
> >
> com.polyspot.connector.framework.orchestrators.OrchestratorMonoThreaded.requestSynchronizeOnceCreated(OrchestratorMonoThreaded.java:16)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.requestSynchronize(Orchestrator.java:108)
> >
> com.polyspot.connector.framework.MonitoredNodeExecutor.requestChildExecution(MonitoredNodeExecutor.java:29)
> >
> com.polyspot.connector.knowledgetree.model.KnowledgeTreeRoot.synchronizeAllChildren(KnowledgeTreeRoot.java:81)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.syncChildren(Orchestrator.java:311)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.processGrantedNode(Orchestrator.java:177)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.processNode(Orchestrator.java:237)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.synchronize(Orchestrator.java:288)
> >
> com.polyspot.connector.framework.orchestrators.OrchestratorMonoThreaded.requestSynchronizeOnceCreated(OrchestratorMonoThreaded.java:16)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.requestSynchronize(Orchestrator.java:108)
> >
> com.polyspot.connector.framework.orchestrators.Orchestrator.synchronize(Orchestrator.java:92)
> >
> com.polyspot.connector.framework.Connector.synchronize(Connector.java:51)
> >
> com.polyspot.connector.framework.pes.Runner.synchronize(Runner.java:104)
> > com.polyspot.connector.framework.pes.Runner.main(Runner.java:88)
> > com.polyspot.connector.framework.pes.Runner.main(Runner.java:74)
> >
> com.polyspot.connector.knowledgetree.MainPESKnowledgeTreeConnector.main(MainPESKnowledgeTreeConnector.java:26)
> > {code}
>
> --
> This message is automatically generated by JIRA.
> If you think it was sent incorrectly, please contact your JIRA
> administrators
> For more information on JIRA, see: http://www.atlassian.com/software/jira
>