You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@tika.apache.org by "Tyler Palsulich (JIRA)" <ji...@apache.org> on 2015/03/02 20:26:04 UTC
[jira] [Updated] (TIKA-954) Tika throws OOM and GC limited exceeded
on Microsoft docx file
[ https://issues.apache.org/jira/browse/TIKA-954?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Tyler Palsulich updated TIKA-954:
---------------------------------
Description:
Stack trace produced with attached docx file
{code}
2012-07-13_04:45:36.86910 java.lang.OutOfMemoryError: GC overhead limit exceeded
2012-07-13_04:45:36.86932 Dumping heap to /var/log/oom/content-extractor-9998.dump.1 ...
2012-07-13_04:46:47.38774 Heap dump file created [925402960 bytes in 70.518 secs]
2012-07-13_04:46:57.17658 java.lang.OutOfMemoryError: GC overhead limit exceeded
2012-07-13_04:46:57.17718 at java.lang.String.substring(String.java:1939)
2012-07-13_04:46:57.17736 at org.apache.xmlbeans.impl.store.Locale$SaxHandler.startElement(Locale.java:3254)
2012-07-13_04:46:57.17750 at org.apache.xmlbeans.impl.piccolo.xml.Piccolo.reportStartTag(Piccolo.java:1082)
2012-07-13_04:46:57.17763 at org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.parseAttributesNS(PiccoloLexer.java:1822)
2012-07-13_04:46:57.17777 at org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.parseOpenTagNS(PiccoloLexer.java:1521)
2012-07-13_04:46:57.17793 at org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.parseTagNS(PiccoloLexer.java:1362)
2012-07-13_04:46:57.17806 at org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.parseXMLNS(PiccoloLexer.java:1293)
2012-07-13_04:46:57.17819 at org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.parseXML(PiccoloLexer.java:1261)
2012-07-13_04:46:57.17839 at org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.yylex(PiccoloLexer.java:4808)
2012-07-13_04:46:57.17853 at org.apache.xmlbeans.impl.piccolo.xml.Piccolo.yylex(Piccolo.java:1290)
2012-07-13_04:46:57.17868 at org.apache.xmlbeans.impl.piccolo.xml.Piccolo.yyparse(Piccolo.java:1400)
2012-07-13_04:46:57.17883 at org.apache.xmlbeans.impl.piccolo.xml.Piccolo.parse(Piccolo.java:714)
2012-07-13_04:46:57.17897 at org.apache.xmlbeans.impl.store.Locale$SaxLoader.load(Locale.java:3439)
2012-07-13_04:46:57.17911 at org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Locale.java:1270)
2012-07-13_04:46:57.17929 at org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Locale.java:1257)
2012-07-13_04:46:57.17945 at org.apache.xmlbeans.impl.schema.SchemaTypeLoaderBase.parse(SchemaTypeLoaderBase.java:345)
2012-07-13_04:46:57.17962 at org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument$Factory.parse(Unknown Source)
2012-07-13_04:46:57.17978 at org.apache.poi.xwpf.usermodel.XWPFDocument.onDocumentRead(XWPFDocument.java:134)
2012-07-13_04:46:57.17991 at org.apache.poi.POIXMLDocument.load(POIXMLDocument.java:159)
2012-07-13_04:46:57.18004 at org.apache.poi.xwpf.usermodel.XWPFDocument.<init>(XWPFDocument.java:116)
2012-07-13_04:46:57.18019 at org.apache.poi.xwpf.extractor.XWPFWordExtractor.<init>(XWPFWordExtractor.java:53)
2012-07-13_04:46:57.18035 at org.apache.poi.extractor.ExtractorFactory.createExtractor(ExtractorFactory.java:180)
2012-07-13_04:46:57.18051 at org.apache.tika.parser.microsoft.ooxml.OOXMLExtractorFactory.parse(OOXMLExtractorFactory.java:87)
2012-07-13_04:46:57.18066 at org.apache.tika.parser.microsoft.ooxml.OOXMLParser.parse(OOXMLParser.java:82)
2012-07-13_04:46:57.18078 at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:242)
2012-07-13_04:46:57.18090 at org.apache.tika.parser.ParserDecorator.parse(ParserDecorator.java:91)
2012-07-13_04:46:57.18103 at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:242)
2012-07-13_04:46:57.18115 at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:120)
2012-07-13_04:46:57.18127 at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:136)
2012-07-13_04:46:57.18146 at org.apache.tika.server.TikaResource$3.write(TikaResource.java:138)
2012-07-13_04:46:57.18158 at org.apache.cxf.jaxrs.provider.BinaryDataProvider.writeTo(BinaryDataProvider.java:117)
2012-07-13_04:46:57.18169 at org.apache.cxf.jaxrs.interceptor.JAXRSOutInterceptor.serializeMessage(JAXRSOutInterceptor.java:257)
{code}
was:
Stack trace produced with attached docx file
2012-07-13_04:45:36.86910 java.lang.OutOfMemoryError: GC overhead limit exceeded
2012-07-13_04:45:36.86932 Dumping heap to /var/log/oom/content-extractor-9998.dump.1 ...
2012-07-13_04:46:47.38774 Heap dump file created [925402960 bytes in 70.518 secs]
2012-07-13_04:46:57.17658 java.lang.OutOfMemoryError: GC overhead limit exceeded
2012-07-13_04:46:57.17718 at java.lang.String.substring(String.java:1939)
2012-07-13_04:46:57.17736 at org.apache.xmlbeans.impl.store.Locale$SaxHandler.startElement(Locale.java:3254)
2012-07-13_04:46:57.17750 at org.apache.xmlbeans.impl.piccolo.xml.Piccolo.reportStartTag(Piccolo.java:1082)
2012-07-13_04:46:57.17763 at org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.parseAttributesNS(PiccoloLexer.java:1822)
2012-07-13_04:46:57.17777 at org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.parseOpenTagNS(PiccoloLexer.java:1521)
2012-07-13_04:46:57.17793 at org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.parseTagNS(PiccoloLexer.java:1362)
2012-07-13_04:46:57.17806 at org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.parseXMLNS(PiccoloLexer.java:1293)
2012-07-13_04:46:57.17819 at org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.parseXML(PiccoloLexer.java:1261)
2012-07-13_04:46:57.17839 at org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.yylex(PiccoloLexer.java:4808)
2012-07-13_04:46:57.17853 at org.apache.xmlbeans.impl.piccolo.xml.Piccolo.yylex(Piccolo.java:1290)
2012-07-13_04:46:57.17868 at org.apache.xmlbeans.impl.piccolo.xml.Piccolo.yyparse(Piccolo.java:1400)
2012-07-13_04:46:57.17883 at org.apache.xmlbeans.impl.piccolo.xml.Piccolo.parse(Piccolo.java:714)
2012-07-13_04:46:57.17897 at org.apache.xmlbeans.impl.store.Locale$SaxLoader.load(Locale.java:3439)
2012-07-13_04:46:57.17911 at org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Locale.java:1270)
2012-07-13_04:46:57.17929 at org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Locale.java:1257)
2012-07-13_04:46:57.17945 at org.apache.xmlbeans.impl.schema.SchemaTypeLoaderBase.parse(SchemaTypeLoaderBase.java:345)
2012-07-13_04:46:57.17962 at org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument$Factory.parse(Unknown Source)
2012-07-13_04:46:57.17978 at org.apache.poi.xwpf.usermodel.XWPFDocument.onDocumentRead(XWPFDocument.java:134)
2012-07-13_04:46:57.17991 at org.apache.poi.POIXMLDocument.load(POIXMLDocument.java:159)
2012-07-13_04:46:57.18004 at org.apache.poi.xwpf.usermodel.XWPFDocument.<init>(XWPFDocument.java:116)
2012-07-13_04:46:57.18019 at org.apache.poi.xwpf.extractor.XWPFWordExtractor.<init>(XWPFWordExtractor.java:53)
2012-07-13_04:46:57.18035 at org.apache.poi.extractor.ExtractorFactory.createExtractor(ExtractorFactory.java:180)
2012-07-13_04:46:57.18051 at org.apache.tika.parser.microsoft.ooxml.OOXMLExtractorFactory.parse(OOXMLExtractorFactory.java:87)
2012-07-13_04:46:57.18066 at org.apache.tika.parser.microsoft.ooxml.OOXMLParser.parse(OOXMLParser.java:82)
2012-07-13_04:46:57.18078 at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:242)
2012-07-13_04:46:57.18090 at org.apache.tika.parser.ParserDecorator.parse(ParserDecorator.java:91)
2012-07-13_04:46:57.18103 at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:242)
2012-07-13_04:46:57.18115 at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:120)
2012-07-13_04:46:57.18127 at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:136)
2012-07-13_04:46:57.18146 at org.apache.tika.server.TikaResource$3.write(TikaResource.java:138)
2012-07-13_04:46:57.18158 at org.apache.cxf.jaxrs.provider.BinaryDataProvider.writeTo(BinaryDataProvider.java:117)
2012-07-13_04:46:57.18169 at org.apache.cxf.jaxrs.interceptor.JAXRSOutInterceptor.serializeMessage(JAXRSOutInterceptor.java:257)
> Tika throws OOM and GC limited exceeded on Microsoft docx file
> --------------------------------------------------------------
>
> Key: TIKA-954
> URL: https://issues.apache.org/jira/browse/TIKA-954
> Project: Tika
> Issue Type: Bug
> Components: parser
> Affects Versions: 1.2
> Environment: Linux (CentOS 4.x)
> Reporter: Rob Tulloh
> Attachments: 91468cee-fb0a-4692-adfd-c2b3cb0613da.docx, Word.docx
>
>
> Stack trace produced with attached docx file
> {code}
> 2012-07-13_04:45:36.86910 java.lang.OutOfMemoryError: GC overhead limit exceeded
> 2012-07-13_04:45:36.86932 Dumping heap to /var/log/oom/content-extractor-9998.dump.1 ...
> 2012-07-13_04:46:47.38774 Heap dump file created [925402960 bytes in 70.518 secs]
> 2012-07-13_04:46:57.17658 java.lang.OutOfMemoryError: GC overhead limit exceeded
> 2012-07-13_04:46:57.17718 at java.lang.String.substring(String.java:1939)
> 2012-07-13_04:46:57.17736 at org.apache.xmlbeans.impl.store.Locale$SaxHandler.startElement(Locale.java:3254)
> 2012-07-13_04:46:57.17750 at org.apache.xmlbeans.impl.piccolo.xml.Piccolo.reportStartTag(Piccolo.java:1082)
> 2012-07-13_04:46:57.17763 at org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.parseAttributesNS(PiccoloLexer.java:1822)
> 2012-07-13_04:46:57.17777 at org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.parseOpenTagNS(PiccoloLexer.java:1521)
> 2012-07-13_04:46:57.17793 at org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.parseTagNS(PiccoloLexer.java:1362)
> 2012-07-13_04:46:57.17806 at org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.parseXMLNS(PiccoloLexer.java:1293)
> 2012-07-13_04:46:57.17819 at org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.parseXML(PiccoloLexer.java:1261)
> 2012-07-13_04:46:57.17839 at org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.yylex(PiccoloLexer.java:4808)
> 2012-07-13_04:46:57.17853 at org.apache.xmlbeans.impl.piccolo.xml.Piccolo.yylex(Piccolo.java:1290)
> 2012-07-13_04:46:57.17868 at org.apache.xmlbeans.impl.piccolo.xml.Piccolo.yyparse(Piccolo.java:1400)
> 2012-07-13_04:46:57.17883 at org.apache.xmlbeans.impl.piccolo.xml.Piccolo.parse(Piccolo.java:714)
> 2012-07-13_04:46:57.17897 at org.apache.xmlbeans.impl.store.Locale$SaxLoader.load(Locale.java:3439)
> 2012-07-13_04:46:57.17911 at org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Locale.java:1270)
> 2012-07-13_04:46:57.17929 at org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Locale.java:1257)
> 2012-07-13_04:46:57.17945 at org.apache.xmlbeans.impl.schema.SchemaTypeLoaderBase.parse(SchemaTypeLoaderBase.java:345)
> 2012-07-13_04:46:57.17962 at org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument$Factory.parse(Unknown Source)
> 2012-07-13_04:46:57.17978 at org.apache.poi.xwpf.usermodel.XWPFDocument.onDocumentRead(XWPFDocument.java:134)
> 2012-07-13_04:46:57.17991 at org.apache.poi.POIXMLDocument.load(POIXMLDocument.java:159)
> 2012-07-13_04:46:57.18004 at org.apache.poi.xwpf.usermodel.XWPFDocument.<init>(XWPFDocument.java:116)
> 2012-07-13_04:46:57.18019 at org.apache.poi.xwpf.extractor.XWPFWordExtractor.<init>(XWPFWordExtractor.java:53)
> 2012-07-13_04:46:57.18035 at org.apache.poi.extractor.ExtractorFactory.createExtractor(ExtractorFactory.java:180)
> 2012-07-13_04:46:57.18051 at org.apache.tika.parser.microsoft.ooxml.OOXMLExtractorFactory.parse(OOXMLExtractorFactory.java:87)
> 2012-07-13_04:46:57.18066 at org.apache.tika.parser.microsoft.ooxml.OOXMLParser.parse(OOXMLParser.java:82)
> 2012-07-13_04:46:57.18078 at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:242)
> 2012-07-13_04:46:57.18090 at org.apache.tika.parser.ParserDecorator.parse(ParserDecorator.java:91)
> 2012-07-13_04:46:57.18103 at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:242)
> 2012-07-13_04:46:57.18115 at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:120)
> 2012-07-13_04:46:57.18127 at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:136)
> 2012-07-13_04:46:57.18146 at org.apache.tika.server.TikaResource$3.write(TikaResource.java:138)
> 2012-07-13_04:46:57.18158 at org.apache.cxf.jaxrs.provider.BinaryDataProvider.writeTo(BinaryDataProvider.java:117)
> 2012-07-13_04:46:57.18169 at org.apache.cxf.jaxrs.interceptor.JAXRSOutInterceptor.serializeMessage(JAXRSOutInterceptor.java:257)
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)