You are viewing a plain text version of this content. The canonical link for it is here.
Posted to doxia-commits@maven.apache.org by lt...@apache.org on 2010/05/07 11:47:55 UTC
svn commit: r942043 - in
/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia:
parser/AbstractXmlParser.java util/XmlValidator.java
Author: ltheussl
Date: Fri May 7 09:47:55 2010
New Revision: 942043
URL: http://svn.apache.org/viewvc?rev=942043&view=rev
Log:
extract validation routine into a separate class
Added:
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/XmlValidator.java
Modified:
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java
Modified: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java
URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java?rev=942043&r1=942042&r2=942043&view=diff
==============================================================================
--- maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java (original)
+++ maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java Fri May 7 09:47:55 2010
@@ -37,20 +37,19 @@ import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import javax.xml.XMLConstants;
-
import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;
-import org.apache.maven.doxia.logging.Log;
+
import org.apache.maven.doxia.macro.MacroExecutionException;
import org.apache.maven.doxia.markup.XmlMarkup;
import org.apache.maven.doxia.sink.Sink;
import org.apache.maven.doxia.sink.SinkEventAttributeSet;
import org.apache.maven.doxia.util.HtmlTools;
+import org.apache.maven.doxia.util.XmlValidator;
import org.codehaus.plexus.util.FileUtils;
import org.codehaus.plexus.util.IOUtil;
@@ -62,10 +61,6 @@ import org.codehaus.plexus.util.xml.pull
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
-import org.xml.sax.SAXParseException;
-import org.xml.sax.XMLReader;
-import org.xml.sax.helpers.DefaultHandler;
-import org.xml.sax.helpers.XMLReaderFactory;
/**
* An abstract class that defines some convenience methods for <code>XML</code> parsers.
@@ -96,15 +91,6 @@ public abstract class AbstractXmlParser
private static final Pattern PATTERN_ENTITY_2 =
Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>" );
- /**
- * Doctype pattern i.e. ".*<!DOCTYPE([^>]*)>.*"
- * see <a href="http://www.w3.org/TR/REC-xml/#NT-doctypedecl">http://www.w3.org/TR/REC-xml/#NT-doctypedecl</a>.
- */
- private static final Pattern PATTERN_DOCTYPE = Pattern.compile( ".*" + DOCTYPE_START + "([^>]*)>.*" );
-
- /** Tag pattern as defined in http://www.w3.org/TR/REC-xml/#NT-Name */
- private static final Pattern PATTERN_TAG = Pattern.compile( ".*<([A-Za-z][A-Za-z0-9:_.-]*)([^>]*)>.*" );
-
private boolean ignorableWhitespace;
private boolean collapsibleWhitespace;
@@ -115,9 +101,6 @@ public abstract class AbstractXmlParser
private boolean validate = true;
- /** lazy xmlReader to validate xml content*/
- private XMLReader xmlReader;
-
/** {@inheritDoc} */
public void parse( Reader source, Sink sink )
throws ParseException
@@ -137,7 +120,7 @@ public abstract class AbstractXmlParser
throw new ParseException( "Error reading the model: " + e.getMessage(), e );
}
- validate( content );
+ new XmlValidator( getLog() ).validate( content );
source = new StringReader( content );
}
@@ -592,82 +575,6 @@ public abstract class AbstractXmlParser
// ----------------------------------------------------------------------
/**
- * Validate an XML content with SAX.
- *
- * @param content a not null xml content
- * @throws ParseException if any.
- */
- private void validate( String content )
- throws ParseException
- {
- try
- {
- // 1 if there's a doctype
- boolean hasDoctype = false;
- Matcher matcher = PATTERN_DOCTYPE.matcher( content );
- if ( matcher.find() )
- {
- hasDoctype = true;
- }
-
- // 2 check for an xmlns instance
- boolean hasXsd = false;
- matcher = PATTERN_TAG.matcher( content );
- if ( matcher.find() )
- {
- String value = matcher.group( 2 );
-
- if ( value.indexOf( XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI ) != -1 )
- {
- hasXsd = true;
- }
- }
-
- // 3 validate content if doctype or xsd
- if ( hasDoctype || hasXsd )
- {
- if ( getLog().isDebugEnabled() )
- {
- getLog().debug( "Validating the content..." );
- }
- getXmlReader( hasXsd && hasDoctype ).parse( new InputSource( new StringReader( content ) ) );
- }
- }
- catch ( IOException e )
- {
- throw new ParseException( "Error validating the model: " + e.getMessage(), e );
- }
- catch ( SAXException e )
- {
- throw new ParseException( "Error validating the model: " + e.getMessage(), e );
- }
- }
-
- /**
- * @param hasDtdAndXsd to flag the <code>ErrorHandler</code>.
- * @return an xmlReader instance.
- * @throws SAXException if any
- */
- private XMLReader getXmlReader( boolean hasDtdAndXsd )
- throws SAXException
- {
- if ( xmlReader == null )
- {
- MessagesErrorHandler errorHandler = new MessagesErrorHandler( getLog() );
-
- xmlReader = XMLReaderFactory.createXMLReader( "org.apache.xerces.parsers.SAXParser" );
- xmlReader.setFeature( "http://xml.org/sax/features/validation", true );
- xmlReader.setFeature( "http://apache.org/xml/features/validation/schema", true );
- xmlReader.setErrorHandler( errorHandler );
- xmlReader.setEntityResolver( new CachedFileEntityResolver() );
- }
-
- ( (MessagesErrorHandler) xmlReader.getErrorHandler() ).setHasDtdAndXsd( hasDtdAndXsd );
-
- return xmlReader;
- }
-
- /**
* Add an entity given by <code>entityName</code> and <code>entityValue</code> to {@link #entities}.
* <br/>
* By default, we exclude the default XML entities: &amp;, &lt;, &gt;, &quot; and &apos;.
@@ -787,125 +694,6 @@ public abstract class AbstractXmlParser
}
/**
- * Convenience class to beautify <code>SAXParseException</code> messages.
- */
- static class MessagesErrorHandler
- extends DefaultHandler
- {
- private static final int TYPE_UNKNOWN = 0;
-
- private static final int TYPE_WARNING = 1;
-
- private static final int TYPE_ERROR = 2;
-
- private static final int TYPE_FATAL = 3;
-
- /** @see org/apache/xerces/impl/msg/XMLMessages.properties#MSG_ELEMENT_NOT_DECLARED */
- private static final Pattern ELEMENT_TYPE_PATTERN =
- Pattern.compile( "Element type \".*\" must be declared.", Pattern.DOTALL );
-
- private final Log log;
-
- private boolean hasDtdAndXsd;
-
- public MessagesErrorHandler( Log log )
- {
- this.log = log;
- }
-
- /**
- * @param hasDtdAndXsd the hasDtdAndXsd to set
- */
- protected void setHasDtdAndXsd( boolean hasDtdAndXsd )
- {
- this.hasDtdAndXsd = hasDtdAndXsd;
- }
-
- /** {@inheritDoc} */
- public void warning( SAXParseException e )
- throws SAXException
- {
- processException( TYPE_WARNING, e );
- }
-
- /** {@inheritDoc} */
- public void error( SAXParseException e )
- throws SAXException
- {
- // Workaround for Xerces complaints when an XML with XSD needs also a <!DOCTYPE []> to specify entities
- // like
- // See http://xsd.stylusstudio.com/2001Nov/post08021.htm
- if ( !hasDtdAndXsd )
- {
- processException( TYPE_ERROR, e );
- return;
- }
-
- Matcher m = ELEMENT_TYPE_PATTERN.matcher( e.getMessage() );
- if ( !m.find() )
- {
- processException( TYPE_ERROR, e );
- }
- }
-
- /** {@inheritDoc} */
- public void fatalError( SAXParseException e )
- throws SAXException
- {
- processException( TYPE_FATAL, e );
- }
-
- private void processException( int type, SAXParseException e )
- throws SAXException
- {
- StringBuffer message = new StringBuffer();
-
- switch ( type )
- {
- case TYPE_WARNING:
- message.append( "Warning:" );
- break;
-
- case TYPE_ERROR:
- message.append( "Error:" );
- break;
-
- case TYPE_FATAL:
- message.append( "Fatal error:" );
- break;
-
- case TYPE_UNKNOWN:
- default:
- message.append( "Unknown:" );
- break;
- }
-
- message.append( EOL );
- message.append( " Public ID: " + e.getPublicId() ).append( EOL );
- message.append( " System ID: " + e.getSystemId() ).append( EOL );
- message.append( " Line number: " + e.getLineNumber() ).append( EOL );
- message.append( " Column number: " + e.getColumnNumber() ).append( EOL );
- message.append( " Message: " + e.getMessage() ).append( EOL );
-
- switch ( type )
- {
- case TYPE_WARNING:
- if ( log.isWarnEnabled() )
- {
- log.warn( message.toString() );
- }
- break;
-
- case TYPE_UNKNOWN:
- case TYPE_ERROR:
- case TYPE_FATAL:
- default:
- throw new SAXException( message.toString() );
- }
- }
- }
-
- /**
* Implementation of the callback mechanism <code>EntityResolver</code>.
* Using a mechanism of cached files in temp dir to improve performance when using the <code>XMLReader</code>.
*/
Added: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/XmlValidator.java
URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/XmlValidator.java?rev=942043&view=auto
==============================================================================
--- maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/XmlValidator.java (added)
+++ maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/XmlValidator.java Fri May 7 09:47:55 2010
@@ -0,0 +1,271 @@
+package org.apache.maven.doxia.util;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.io.IOException;
+import java.io.StringReader;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import javax.xml.XMLConstants;
+
+import org.apache.maven.doxia.logging.Log;
+import org.apache.maven.doxia.markup.XmlMarkup;
+import org.apache.maven.doxia.parser.AbstractXmlParser.CachedFileEntityResolver;
+import org.apache.maven.doxia.parser.ParseException;
+
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+import org.xml.sax.XMLReader;
+import org.xml.sax.helpers.DefaultHandler;
+import org.xml.sax.helpers.XMLReaderFactory;
+
+/**
+ * A class to validate xml documents.
+ *
+ * @version $Id$
+ * @since 1.1.3
+ */
+public class XmlValidator
+{
+ /**
+ * Doctype pattern i.e. ".*<!DOCTYPE([^>]*)>.*"
+ * see <a href="http://www.w3.org/TR/REC-xml/#NT-doctypedecl">http://www.w3.org/TR/REC-xml/#NT-doctypedecl</a>.
+ */
+ private static final Pattern PATTERN_DOCTYPE = Pattern.compile( ".*" + XmlMarkup.DOCTYPE_START + "([^>]*)>.*" );
+
+ /** Tag pattern as defined in http://www.w3.org/TR/REC-xml/#NT-Name */
+ private static final Pattern PATTERN_TAG = Pattern.compile( ".*<([A-Za-z][A-Za-z0-9:_.-]*)([^>]*)>.*" );
+
+ /** lazy xmlReader to validate xml content*/
+ private XMLReader xmlReader;
+
+ private Log logger;
+
+ /**
+ * Constructor.
+ *
+ * @param log a logger, not null.
+ */
+ public XmlValidator( Log log )
+ {
+ this.logger = log;
+ }
+
+ /**
+ * Validate an XML content with SAX.
+ *
+ * @param content a not null xml content
+ * @throws ParseException if any.
+ */
+ public void validate( String content )
+ throws ParseException
+ {
+ try
+ {
+ // 1 if there's a doctype
+ boolean hasDoctype = false;
+ Matcher matcher = PATTERN_DOCTYPE.matcher( content );
+ if ( matcher.find() )
+ {
+ hasDoctype = true;
+ }
+
+ // 2 check for an xmlns instance
+ boolean hasXsd = false;
+ matcher = PATTERN_TAG.matcher( content );
+ if ( matcher.find() )
+ {
+ String value = matcher.group( 2 );
+
+ if ( value.indexOf( XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI ) != -1 )
+ {
+ hasXsd = true;
+ }
+ }
+
+ // 3 validate content if doctype or xsd
+ if ( hasDoctype || hasXsd )
+ {
+ getLog().debug( "Validating the content..." );
+ getXmlReader( hasXsd && hasDoctype ).parse( new InputSource( new StringReader( content ) ) );
+ }
+ }
+ catch ( IOException e )
+ {
+ throw new ParseException( "Error validating the model: " + e.getMessage(), e );
+ }
+ catch ( SAXException e )
+ {
+ throw new ParseException( "Error validating the model: " + e.getMessage(), e );
+ }
+ }
+
+ /**
+ * @param hasDtdAndXsd to flag the <code>ErrorHandler</code>.
+ * @return an xmlReader instance.
+ * @throws SAXException if any
+ */
+ private XMLReader getXmlReader( boolean hasDtdAndXsd )
+ throws SAXException
+ {
+ if ( xmlReader == null )
+ {
+ MessagesErrorHandler errorHandler = new MessagesErrorHandler( getLog() );
+
+ xmlReader = XMLReaderFactory.createXMLReader( "org.apache.xerces.parsers.SAXParser" );
+ xmlReader.setFeature( "http://xml.org/sax/features/validation", true );
+ xmlReader.setFeature( "http://apache.org/xml/features/validation/schema", true );
+ xmlReader.setErrorHandler( errorHandler );
+ xmlReader.setEntityResolver( new CachedFileEntityResolver() );
+ }
+
+ ( (MessagesErrorHandler) xmlReader.getErrorHandler() ).setHasDtdAndXsd( hasDtdAndXsd );
+
+ return xmlReader;
+ }
+
+ private Log getLog()
+ {
+ return logger;
+ }
+
+ /**
+ * Convenience class to beautify <code>SAXParseException</code> messages.
+ */
+ static class MessagesErrorHandler
+ extends DefaultHandler
+ {
+ private static final int TYPE_UNKNOWN = 0;
+
+ private static final int TYPE_WARNING = 1;
+
+ private static final int TYPE_ERROR = 2;
+
+ private static final int TYPE_FATAL = 3;
+
+ private static final String EOL = XmlMarkup.EOL;
+
+ /** @see org/apache/xerces/impl/msg/XMLMessages.properties#MSG_ELEMENT_NOT_DECLARED */
+ private static final Pattern ELEMENT_TYPE_PATTERN =
+ Pattern.compile( "Element type \".*\" must be declared.", Pattern.DOTALL );
+
+ private final Log log;
+
+ private boolean hasDtdAndXsd;
+
+ public MessagesErrorHandler( Log log )
+ {
+ this.log = log;
+ }
+
+ /**
+ * @param hasDtdAndXsd the hasDtdAndXsd to set
+ */
+ protected void setHasDtdAndXsd( boolean hasDtdAndXsd )
+ {
+ this.hasDtdAndXsd = hasDtdAndXsd;
+ }
+
+ /** {@inheritDoc} */
+ public void warning( SAXParseException e )
+ throws SAXException
+ {
+ processException( TYPE_WARNING, e );
+ }
+
+ /** {@inheritDoc} */
+ public void error( SAXParseException e )
+ throws SAXException
+ {
+ // Workaround for Xerces complaints when an XML with XSD needs also a <!DOCTYPE []> to specify entities
+ // like
+ // See http://xsd.stylusstudio.com/2001Nov/post08021.htm
+ if ( !hasDtdAndXsd )
+ {
+ processException( TYPE_ERROR, e );
+ return;
+ }
+
+ Matcher m = ELEMENT_TYPE_PATTERN.matcher( e.getMessage() );
+ if ( !m.find() )
+ {
+ processException( TYPE_ERROR, e );
+ }
+ }
+
+ /** {@inheritDoc} */
+ public void fatalError( SAXParseException e )
+ throws SAXException
+ {
+ processException( TYPE_FATAL, e );
+ }
+
+ private void processException( int type, SAXParseException e )
+ throws SAXException
+ {
+ StringBuffer message = new StringBuffer();
+
+ switch ( type )
+ {
+ case TYPE_WARNING:
+ message.append( "Warning:" );
+ break;
+
+ case TYPE_ERROR:
+ message.append( "Error:" );
+ break;
+
+ case TYPE_FATAL:
+ message.append( "Fatal error:" );
+ break;
+
+ case TYPE_UNKNOWN:
+ default:
+ message.append( "Unknown:" );
+ break;
+ }
+
+ message.append( EOL );
+ message.append( " Public ID: " + e.getPublicId() ).append( EOL );
+ message.append( " System ID: " + e.getSystemId() ).append( EOL );
+ message.append( " Line number: " + e.getLineNumber() ).append( EOL );
+ message.append( " Column number: " + e.getColumnNumber() ).append( EOL );
+ message.append( " Message: " + e.getMessage() ).append( EOL );
+
+ final String logMessage = message.toString();
+
+ switch ( type )
+ {
+ case TYPE_WARNING:
+ log.warn( logMessage );
+ break;
+
+ case TYPE_UNKNOWN:
+ case TYPE_ERROR:
+ case TYPE_FATAL:
+ default:
+ throw new SAXException( logMessage );
+ }
+ }
+ }
+}