You are viewing a plain text version of this content. The canonical link for it is here.
Posted to doxia-commits@maven.apache.org by lt...@apache.org on 2010/05/07 11:47:55 UTC

svn commit: r942043 - in /maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia: parser/AbstractXmlParser.java util/XmlValidator.java

Author: ltheussl
Date: Fri May  7 09:47:55 2010
New Revision: 942043

URL: http://svn.apache.org/viewvc?rev=942043&view=rev
Log:
extract validation routine into a separate class

Added:
    maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/XmlValidator.java
Modified:
    maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java

Modified: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java
URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java?rev=942043&r1=942042&r2=942043&view=diff
==============================================================================
--- maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java (original)
+++ maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java Fri May  7 09:47:55 2010
@@ -37,20 +37,19 @@ import java.util.Map;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-import javax.xml.XMLConstants;
-
 import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
 import org.apache.commons.httpclient.HttpClient;
 import org.apache.commons.httpclient.HttpException;
 import org.apache.commons.httpclient.HttpStatus;
 import org.apache.commons.httpclient.methods.GetMethod;
 import org.apache.commons.httpclient.params.HttpMethodParams;
-import org.apache.maven.doxia.logging.Log;
+
 import org.apache.maven.doxia.macro.MacroExecutionException;
 import org.apache.maven.doxia.markup.XmlMarkup;
 import org.apache.maven.doxia.sink.Sink;
 import org.apache.maven.doxia.sink.SinkEventAttributeSet;
 import org.apache.maven.doxia.util.HtmlTools;
+import org.apache.maven.doxia.util.XmlValidator;
 
 import org.codehaus.plexus.util.FileUtils;
 import org.codehaus.plexus.util.IOUtil;
@@ -62,10 +61,6 @@ import org.codehaus.plexus.util.xml.pull
 import org.xml.sax.EntityResolver;
 import org.xml.sax.InputSource;
 import org.xml.sax.SAXException;
-import org.xml.sax.SAXParseException;
-import org.xml.sax.XMLReader;
-import org.xml.sax.helpers.DefaultHandler;
-import org.xml.sax.helpers.XMLReaderFactory;
 
 /**
  * An abstract class that defines some convenience methods for <code>XML</code> parsers.
@@ -96,15 +91,6 @@ public abstract class AbstractXmlParser
     private static final Pattern PATTERN_ENTITY_2 =
         Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>" );
 
-    /**
-     * Doctype pattern i.e. ".*<!DOCTYPE([^>]*)>.*"
-     * see <a href="http://www.w3.org/TR/REC-xml/#NT-doctypedecl">http://www.w3.org/TR/REC-xml/#NT-doctypedecl</a>.
-     */
-    private static final Pattern PATTERN_DOCTYPE = Pattern.compile( ".*" + DOCTYPE_START + "([^>]*)>.*" );
-
-    /** Tag pattern as defined in http://www.w3.org/TR/REC-xml/#NT-Name */
-    private static final Pattern PATTERN_TAG = Pattern.compile( ".*<([A-Za-z][A-Za-z0-9:_.-]*)([^>]*)>.*" );
-
     private boolean ignorableWhitespace;
 
     private boolean collapsibleWhitespace;
@@ -115,9 +101,6 @@ public abstract class AbstractXmlParser
 
     private boolean validate = true;
 
-    /** lazy xmlReader to validate xml content*/
-    private XMLReader xmlReader;
-
     /** {@inheritDoc} */
     public void parse( Reader source, Sink sink )
         throws ParseException
@@ -137,7 +120,7 @@ public abstract class AbstractXmlParser
                 throw new ParseException( "Error reading the model: " + e.getMessage(), e );
             }
 
-            validate( content );
+            new XmlValidator( getLog() ).validate( content );
 
             source = new StringReader( content );
         }
@@ -592,82 +575,6 @@ public abstract class AbstractXmlParser
     // ----------------------------------------------------------------------
 
     /**
-     * Validate an XML content with SAX.
-     *
-     * @param content a not null xml content
-     * @throws ParseException if any.
-     */
-    private void validate( String content )
-        throws ParseException
-    {
-        try
-        {
-            // 1 if there's a doctype
-            boolean hasDoctype = false;
-            Matcher matcher = PATTERN_DOCTYPE.matcher( content );
-            if ( matcher.find() )
-            {
-                hasDoctype = true;
-            }
-
-            // 2 check for an xmlns instance
-            boolean hasXsd = false;
-            matcher = PATTERN_TAG.matcher( content );
-            if ( matcher.find() )
-            {
-                String value = matcher.group( 2 );
-
-                if ( value.indexOf( XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI ) != -1 )
-                {
-                    hasXsd = true;
-                }
-            }
-
-            // 3 validate content if doctype or xsd
-            if ( hasDoctype || hasXsd )
-            {
-                if ( getLog().isDebugEnabled() )
-                {
-                    getLog().debug( "Validating the content..." );
-                }
-                getXmlReader( hasXsd && hasDoctype ).parse( new InputSource( new StringReader( content ) ) );
-            }
-        }
-        catch ( IOException e )
-        {
-            throw new ParseException( "Error validating the model: " + e.getMessage(), e );
-        }
-        catch ( SAXException e )
-        {
-            throw new ParseException( "Error validating the model: " + e.getMessage(), e );
-        }
-    }
-
-    /**
-     * @param hasDtdAndXsd to flag the <code>ErrorHandler</code>.
-     * @return an xmlReader instance.
-     * @throws SAXException if any
-     */
-    private XMLReader getXmlReader( boolean hasDtdAndXsd )
-        throws SAXException
-    {
-        if ( xmlReader == null )
-        {
-            MessagesErrorHandler errorHandler = new MessagesErrorHandler( getLog() );
-
-            xmlReader = XMLReaderFactory.createXMLReader( "org.apache.xerces.parsers.SAXParser" );
-            xmlReader.setFeature( "http://xml.org/sax/features/validation", true );
-            xmlReader.setFeature( "http://apache.org/xml/features/validation/schema", true );
-            xmlReader.setErrorHandler( errorHandler );
-            xmlReader.setEntityResolver( new CachedFileEntityResolver() );
-        }
-
-        ( (MessagesErrorHandler) xmlReader.getErrorHandler() ).setHasDtdAndXsd( hasDtdAndXsd );
-
-        return xmlReader;
-    }
-
-    /**
      * Add an entity given by <code>entityName</code> and <code>entityValue</code> to {@link #entities}.
      * <br/>
      * By default, we exclude the default XML entities: &#38;amp;, &#38;lt;, &#38;gt;, &#38;quot; and &#38;apos;.
@@ -787,125 +694,6 @@ public abstract class AbstractXmlParser
     }
 
     /**
-     * Convenience class to beautify <code>SAXParseException</code> messages.
-     */
-    static class MessagesErrorHandler
-        extends DefaultHandler
-    {
-        private static final int TYPE_UNKNOWN = 0;
-
-        private static final int TYPE_WARNING = 1;
-
-        private static final int TYPE_ERROR = 2;
-
-        private static final int TYPE_FATAL = 3;
-
-        /** @see org/apache/xerces/impl/msg/XMLMessages.properties#MSG_ELEMENT_NOT_DECLARED */
-        private static final Pattern ELEMENT_TYPE_PATTERN =
-            Pattern.compile( "Element type \".*\" must be declared.", Pattern.DOTALL );
-
-        private final Log log;
-
-        private boolean hasDtdAndXsd;
-
-        public MessagesErrorHandler( Log log )
-        {
-            this.log = log;
-        }
-
-        /**
-         * @param hasDtdAndXsd the hasDtdAndXsd to set
-         */
-        protected void setHasDtdAndXsd( boolean hasDtdAndXsd )
-        {
-            this.hasDtdAndXsd = hasDtdAndXsd;
-        }
-
-        /** {@inheritDoc} */
-        public void warning( SAXParseException e )
-            throws SAXException
-        {
-            processException( TYPE_WARNING, e );
-        }
-
-        /** {@inheritDoc} */
-        public void error( SAXParseException e )
-            throws SAXException
-        {
-            // Workaround for Xerces complaints when an XML with XSD needs also a <!DOCTYPE []> to specify entities
-            // like &nbsp;
-            // See http://xsd.stylusstudio.com/2001Nov/post08021.htm
-            if ( !hasDtdAndXsd )
-            {
-                processException( TYPE_ERROR, e );
-                return;
-            }
-
-            Matcher m = ELEMENT_TYPE_PATTERN.matcher( e.getMessage() );
-            if ( !m.find() )
-            {
-                processException( TYPE_ERROR, e );
-            }
-        }
-
-        /** {@inheritDoc} */
-        public void fatalError( SAXParseException e )
-            throws SAXException
-        {
-            processException( TYPE_FATAL, e );
-        }
-
-        private void processException( int type, SAXParseException e )
-            throws SAXException
-        {
-            StringBuffer message = new StringBuffer();
-
-            switch ( type )
-            {
-                case TYPE_WARNING:
-                    message.append( "Warning:" );
-                    break;
-
-                case TYPE_ERROR:
-                    message.append( "Error:" );
-                    break;
-
-                case TYPE_FATAL:
-                    message.append( "Fatal error:" );
-                    break;
-
-                case TYPE_UNKNOWN:
-                default:
-                    message.append( "Unknown:" );
-                    break;
-            }
-
-            message.append( EOL );
-            message.append( "  Public ID: " + e.getPublicId() ).append( EOL );
-            message.append( "  System ID: " + e.getSystemId() ).append( EOL );
-            message.append( "  Line number: " + e.getLineNumber() ).append( EOL );
-            message.append( "  Column number: " + e.getColumnNumber() ).append( EOL );
-            message.append( "  Message: " + e.getMessage() ).append( EOL );
-
-            switch ( type )
-            {
-                case TYPE_WARNING:
-                    if ( log.isWarnEnabled() )
-                    {
-                        log.warn( message.toString() );
-                    }
-                    break;
-
-                case TYPE_UNKNOWN:
-                case TYPE_ERROR:
-                case TYPE_FATAL:
-                default:
-                    throw new SAXException( message.toString() );
-            }
-        }
-    }
-
-    /**
      * Implementation of the callback mechanism <code>EntityResolver</code>.
      * Using a mechanism of cached files in temp dir to improve performance when using the <code>XMLReader</code>.
      */

Added: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/XmlValidator.java
URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/XmlValidator.java?rev=942043&view=auto
==============================================================================
--- maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/XmlValidator.java (added)
+++ maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/XmlValidator.java Fri May  7 09:47:55 2010
@@ -0,0 +1,271 @@
+package org.apache.maven.doxia.util;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.io.IOException;
+import java.io.StringReader;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import javax.xml.XMLConstants;
+
+import org.apache.maven.doxia.logging.Log;
+import org.apache.maven.doxia.markup.XmlMarkup;
+import org.apache.maven.doxia.parser.AbstractXmlParser.CachedFileEntityResolver;
+import org.apache.maven.doxia.parser.ParseException;
+
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+import org.xml.sax.XMLReader;
+import org.xml.sax.helpers.DefaultHandler;
+import org.xml.sax.helpers.XMLReaderFactory;
+
+/**
+ * A class to validate xml documents.
+ *
+ * @version $Id$
+ * @since 1.1.3
+ */
+public class XmlValidator
+{
+    /**
+     * Doctype pattern i.e. ".*<!DOCTYPE([^>]*)>.*"
+     * see <a href="http://www.w3.org/TR/REC-xml/#NT-doctypedecl">http://www.w3.org/TR/REC-xml/#NT-doctypedecl</a>.
+     */
+    private static final Pattern PATTERN_DOCTYPE = Pattern.compile( ".*" + XmlMarkup.DOCTYPE_START + "([^>]*)>.*" );
+
+    /** Tag pattern as defined in http://www.w3.org/TR/REC-xml/#NT-Name */
+    private static final Pattern PATTERN_TAG = Pattern.compile( ".*<([A-Za-z][A-Za-z0-9:_.-]*)([^>]*)>.*" );
+
+    /** lazy xmlReader to validate xml content*/
+    private XMLReader xmlReader;
+
+    private Log logger;
+
+    /**
+     * Constructor.
+     *
+     * @param log a logger, not null.
+     */
+    public XmlValidator( Log log )
+    {
+        this.logger = log;
+    }
+
+    /**
+     * Validate an XML content with SAX.
+     *
+     * @param content a not null xml content
+     * @throws ParseException if any.
+     */
+    public void validate( String content )
+        throws ParseException
+    {
+        try
+        {
+            // 1 if there's a doctype
+            boolean hasDoctype = false;
+            Matcher matcher = PATTERN_DOCTYPE.matcher( content );
+            if ( matcher.find() )
+            {
+                hasDoctype = true;
+            }
+
+            // 2 check for an xmlns instance
+            boolean hasXsd = false;
+            matcher = PATTERN_TAG.matcher( content );
+            if ( matcher.find() )
+            {
+                String value = matcher.group( 2 );
+
+                if ( value.indexOf( XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI ) != -1 )
+                {
+                    hasXsd = true;
+                }
+            }
+
+            // 3 validate content if doctype or xsd
+            if ( hasDoctype || hasXsd )
+            {
+                getLog().debug( "Validating the content..." );
+                getXmlReader( hasXsd && hasDoctype ).parse( new InputSource( new StringReader( content ) ) );
+            }
+        }
+        catch ( IOException e )
+        {
+            throw new ParseException( "Error validating the model: " + e.getMessage(), e );
+        }
+        catch ( SAXException e )
+        {
+            throw new ParseException( "Error validating the model: " + e.getMessage(), e );
+        }
+    }
+
+    /**
+     * @param hasDtdAndXsd to flag the <code>ErrorHandler</code>.
+     * @return an xmlReader instance.
+     * @throws SAXException if any
+     */
+    private XMLReader getXmlReader( boolean hasDtdAndXsd )
+        throws SAXException
+    {
+        if ( xmlReader == null )
+        {
+            MessagesErrorHandler errorHandler = new MessagesErrorHandler( getLog() );
+
+            xmlReader = XMLReaderFactory.createXMLReader( "org.apache.xerces.parsers.SAXParser" );
+            xmlReader.setFeature( "http://xml.org/sax/features/validation", true );
+            xmlReader.setFeature( "http://apache.org/xml/features/validation/schema", true );
+            xmlReader.setErrorHandler( errorHandler );
+            xmlReader.setEntityResolver( new CachedFileEntityResolver() );
+        }
+
+        ( (MessagesErrorHandler) xmlReader.getErrorHandler() ).setHasDtdAndXsd( hasDtdAndXsd );
+
+        return xmlReader;
+    }
+
+    private Log getLog()
+    {
+        return logger;
+    }
+
+    /**
+     * Convenience class to beautify <code>SAXParseException</code> messages.
+     */
+    static class MessagesErrorHandler
+        extends DefaultHandler
+    {
+        private static final int TYPE_UNKNOWN = 0;
+
+        private static final int TYPE_WARNING = 1;
+
+        private static final int TYPE_ERROR = 2;
+
+        private static final int TYPE_FATAL = 3;
+
+        private static final String EOL = XmlMarkup.EOL;
+
+        /** @see org/apache/xerces/impl/msg/XMLMessages.properties#MSG_ELEMENT_NOT_DECLARED */
+        private static final Pattern ELEMENT_TYPE_PATTERN =
+            Pattern.compile( "Element type \".*\" must be declared.", Pattern.DOTALL );
+
+        private final Log log;
+
+        private boolean hasDtdAndXsd;
+
+        public MessagesErrorHandler( Log log )
+        {
+            this.log = log;
+        }
+
+        /**
+         * @param hasDtdAndXsd the hasDtdAndXsd to set
+         */
+        protected void setHasDtdAndXsd( boolean hasDtdAndXsd )
+        {
+            this.hasDtdAndXsd = hasDtdAndXsd;
+        }
+
+        /** {@inheritDoc} */
+        public void warning( SAXParseException e )
+            throws SAXException
+        {
+            processException( TYPE_WARNING, e );
+        }
+
+        /** {@inheritDoc} */
+        public void error( SAXParseException e )
+            throws SAXException
+        {
+            // Workaround for Xerces complaints when an XML with XSD needs also a <!DOCTYPE []> to specify entities
+            // like &nbsp;
+            // See http://xsd.stylusstudio.com/2001Nov/post08021.htm
+            if ( !hasDtdAndXsd )
+            {
+                processException( TYPE_ERROR, e );
+                return;
+            }
+
+            Matcher m = ELEMENT_TYPE_PATTERN.matcher( e.getMessage() );
+            if ( !m.find() )
+            {
+                processException( TYPE_ERROR, e );
+            }
+        }
+
+        /** {@inheritDoc} */
+        public void fatalError( SAXParseException e )
+            throws SAXException
+        {
+            processException( TYPE_FATAL, e );
+        }
+
+        private void processException( int type, SAXParseException e )
+            throws SAXException
+        {
+            StringBuffer message = new StringBuffer();
+
+            switch ( type )
+            {
+                case TYPE_WARNING:
+                    message.append( "Warning:" );
+                    break;
+
+                case TYPE_ERROR:
+                    message.append( "Error:" );
+                    break;
+
+                case TYPE_FATAL:
+                    message.append( "Fatal error:" );
+                    break;
+
+                case TYPE_UNKNOWN:
+                default:
+                    message.append( "Unknown:" );
+                    break;
+            }
+
+            message.append( EOL );
+            message.append( "  Public ID: " + e.getPublicId() ).append( EOL );
+            message.append( "  System ID: " + e.getSystemId() ).append( EOL );
+            message.append( "  Line number: " + e.getLineNumber() ).append( EOL );
+            message.append( "  Column number: " + e.getColumnNumber() ).append( EOL );
+            message.append( "  Message: " + e.getMessage() ).append( EOL );
+
+            final String logMessage = message.toString();
+
+            switch ( type )
+            {
+                case TYPE_WARNING:
+                    log.warn( logMessage );
+                    break;
+
+                case TYPE_UNKNOWN:
+                case TYPE_ERROR:
+                case TYPE_FATAL:
+                default:
+                    throw new SAXException( logMessage );
+            }
+        }
+    }
+}