You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@any23.apache.org by "Ruben Verborgh (JIRA)" <ji...@apache.org> on 2013/08/13 21:38:47 UTC

[jira] [Updated] (ANY23-166) Parsing crashes with attributes that don't use quotes

     [ https://issues.apache.org/jira/browse/ANY23-166?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Ruben Verborgh updated ANY23-166:
---------------------------------

    Description: 
When trying http://ruben.verborgh.org/tmp/og-test.html in the validator, it fails with:
Internal error.
================================================================
java.lang.IllegalArgumentException: Invalid content ''
	at org.apache.any23.extractor.microdata.ItemPropValue.<init>(ItemPropValue.java:89)
	at org.apache.any23.extractor.microdata.MicrodataParser.getPropertyValue(MicrodataParser.java:341)
	at org.apache.any23.extractor.microdata.MicrodataParser.getItemProps(MicrodataParser.java:394)
	at org.apache.any23.extractor.microdata.MicrodataParser.getItemScope(MicrodataParser.java:471)
	at org.apache.any23.extractor.microdata.MicrodataParser.getMicrodata(MicrodataParser.java:186)
	at org.apache.any23.extractor.microdata.MicrodataParser.getMicrodata(MicrodataParser.java:203)
	at org.apache.any23.extractor.microdata.MicrodataExtractor.run(MicrodataExtractor.java:100)
	at org.apache.any23.extractor.microdata.MicrodataExtractor.run(MicrodataExtractor.java:62)
	at org.apache.any23.extractor.SingleDocumentExtraction.runExtractor(SingleDocumentExtraction.java:477)
	at org.apache.any23.extractor.SingleDocumentExtraction.run(SingleDocumentExtraction.java:260)
	at org.apache.any23.Any23.extract(Any23.java:294)
	at org.apache.any23.Any23.extract(Any23.java:446)
	at org.apache.any23.servlet.WebResponder.runExtraction(WebResponder.java:113)
	at org.apache.any23.servlet.Servlet.doGet(Servlet.java:74)
	at javax.servlet.http.HttpServlet.service(HttpServlet.java:617)
	at javax.servlet.http.HttpServlet.service(HttpServlet.java:717)
	at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:290)
	at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:206)
	at org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:233)
	at org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:191)
	at org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:127)
	at com.googlecode.psiprobe.Tomcat60AgentValve.invoke(Tomcat60AgentValve.java:30)
	at org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:102)
	at org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:109)
	at org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:293)
	at org.apache.coyote.http11.Http11Processor.process(Http11Processor.java:859)
	at org.apache.coyote.http11.Http11Protocol$Http11ConnectionHandler.process(Http11Protocol.java:602)
	at org.apache.tomcat.util.net.JIoEndpoint$Worker.run(JIoEndpoint.java:489)
	at java.lang.Thread.run(Thread.java:662)
================================================================

Source: http://any23.org/any23/?format=best&uri=http%3A%2F%2Fruben.verborgh.org%2Ftmp%2Fog-test.html&validation-mode=validate-fix

Note how a quote is missing in the prefix attribute of the html tag.

The strange thing is that editing the document body makes the error disappear, depending on what you remove. For instance: http://any23.org/any23/?format=best&uri=http%3A%2F%2Fruben.verborgh.org%2Ftmp%2Fog-test2.html&validation-mode=validate-fix


The same happens with http://ruben.verborgh.org/tmp/og-test3.html
which is the most minimal example I could find.

  was:
When trying http://ruben.verborgh.org/tmp/og-test.html in the validator, it fails with:
Internal error.
================================================================
java.lang.IllegalArgumentException: Invalid content ''
	at org.apache.any23.extractor.microdata.ItemPropValue.<init>(ItemPropValue.java:89)
	at org.apache.any23.extractor.microdata.MicrodataParser.getPropertyValue(MicrodataParser.java:341)
	at org.apache.any23.extractor.microdata.MicrodataParser.getItemProps(MicrodataParser.java:394)
	at org.apache.any23.extractor.microdata.MicrodataParser.getItemScope(MicrodataParser.java:471)
	at org.apache.any23.extractor.microdata.MicrodataParser.getMicrodata(MicrodataParser.java:186)
	at org.apache.any23.extractor.microdata.MicrodataParser.getMicrodata(MicrodataParser.java:203)
	at org.apache.any23.extractor.microdata.MicrodataExtractor.run(MicrodataExtractor.java:100)
	at org.apache.any23.extractor.microdata.MicrodataExtractor.run(MicrodataExtractor.java:62)
	at org.apache.any23.extractor.SingleDocumentExtraction.runExtractor(SingleDocumentExtraction.java:477)
	at org.apache.any23.extractor.SingleDocumentExtraction.run(SingleDocumentExtraction.java:260)
	at org.apache.any23.Any23.extract(Any23.java:294)
	at org.apache.any23.Any23.extract(Any23.java:446)
	at org.apache.any23.servlet.WebResponder.runExtraction(WebResponder.java:113)
	at org.apache.any23.servlet.Servlet.doGet(Servlet.java:74)
	at javax.servlet.http.HttpServlet.service(HttpServlet.java:617)
	at javax.servlet.http.HttpServlet.service(HttpServlet.java:717)
	at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:290)
	at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:206)
	at org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:233)
	at org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:191)
	at org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:127)
	at com.googlecode.psiprobe.Tomcat60AgentValve.invoke(Tomcat60AgentValve.java:30)
	at org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:102)
	at org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:109)
	at org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:293)
	at org.apache.coyote.http11.Http11Processor.process(Http11Processor.java:859)
	at org.apache.coyote.http11.Http11Protocol$Http11ConnectionHandler.process(Http11Protocol.java:602)
	at org.apache.tomcat.util.net.JIoEndpoint$Worker.run(JIoEndpoint.java:489)
	at java.lang.Thread.run(Thread.java:662)
================================================================

Source: http://any23.org/any23/?format=best&uri=http%3A%2F%2Fruben.verborgh.org%2Ftmp%2Fog-test.html&validation-mode=validate-fix

Note how a quote is missing in the prefix attribute of the html tag.

The strange thing is that editing the document body makes the error disappear, depending on what you remove. For instance: http://any23.org/any23/?format=best&uri=http%3A%2F%2Fruben.verborgh.org%2Ftmp%2Fog-test2.html&validation-mode=validate-fix

        Summary: Parsing crashes with attributes that don't use quotes  (was: Parsing crashes with missing quote in HTML attribute)
    
> Parsing crashes with attributes that don't use quotes
> -----------------------------------------------------
>
>                 Key: ANY23-166
>                 URL: https://issues.apache.org/jira/browse/ANY23-166
>             Project: Apache Any23
>          Issue Type: Bug
>            Reporter: Ruben Verborgh
>
> When trying http://ruben.verborgh.org/tmp/og-test.html in the validator, it fails with:
> Internal error.
> ================================================================
> java.lang.IllegalArgumentException: Invalid content ''
> 	at org.apache.any23.extractor.microdata.ItemPropValue.<init>(ItemPropValue.java:89)
> 	at org.apache.any23.extractor.microdata.MicrodataParser.getPropertyValue(MicrodataParser.java:341)
> 	at org.apache.any23.extractor.microdata.MicrodataParser.getItemProps(MicrodataParser.java:394)
> 	at org.apache.any23.extractor.microdata.MicrodataParser.getItemScope(MicrodataParser.java:471)
> 	at org.apache.any23.extractor.microdata.MicrodataParser.getMicrodata(MicrodataParser.java:186)
> 	at org.apache.any23.extractor.microdata.MicrodataParser.getMicrodata(MicrodataParser.java:203)
> 	at org.apache.any23.extractor.microdata.MicrodataExtractor.run(MicrodataExtractor.java:100)
> 	at org.apache.any23.extractor.microdata.MicrodataExtractor.run(MicrodataExtractor.java:62)
> 	at org.apache.any23.extractor.SingleDocumentExtraction.runExtractor(SingleDocumentExtraction.java:477)
> 	at org.apache.any23.extractor.SingleDocumentExtraction.run(SingleDocumentExtraction.java:260)
> 	at org.apache.any23.Any23.extract(Any23.java:294)
> 	at org.apache.any23.Any23.extract(Any23.java:446)
> 	at org.apache.any23.servlet.WebResponder.runExtraction(WebResponder.java:113)
> 	at org.apache.any23.servlet.Servlet.doGet(Servlet.java:74)
> 	at javax.servlet.http.HttpServlet.service(HttpServlet.java:617)
> 	at javax.servlet.http.HttpServlet.service(HttpServlet.java:717)
> 	at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:290)
> 	at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:206)
> 	at org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:233)
> 	at org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:191)
> 	at org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:127)
> 	at com.googlecode.psiprobe.Tomcat60AgentValve.invoke(Tomcat60AgentValve.java:30)
> 	at org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:102)
> 	at org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:109)
> 	at org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:293)
> 	at org.apache.coyote.http11.Http11Processor.process(Http11Processor.java:859)
> 	at org.apache.coyote.http11.Http11Protocol$Http11ConnectionHandler.process(Http11Protocol.java:602)
> 	at org.apache.tomcat.util.net.JIoEndpoint$Worker.run(JIoEndpoint.java:489)
> 	at java.lang.Thread.run(Thread.java:662)
> ================================================================
> Source: http://any23.org/any23/?format=best&uri=http%3A%2F%2Fruben.verborgh.org%2Ftmp%2Fog-test.html&validation-mode=validate-fix
> Note how a quote is missing in the prefix attribute of the html tag.
> The strange thing is that editing the document body makes the error disappear, depending on what you remove. For instance: http://any23.org/any23/?format=best&uri=http%3A%2F%2Fruben.verborgh.org%2Ftmp%2Fog-test2.html&validation-mode=validate-fix
> The same happens with http://ruben.verborgh.org/tmp/og-test3.html
> which is the most minimal example I could find.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators
For more information on JIRA, see: http://www.atlassian.com/software/jira