You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@apr.apache.org by ni...@apache.org on 2011/03/23 17:01:12 UTC

svn commit: r1084621 - in /apr/apr/trunk/xml: apr_xml.c apr_xml_expat.c apr_xml_internal.h apr_xml_libxml2.c

Author: niq
Date: Wed Mar 23 16:01:12 2011
New Revision: 1084621

URL: http://svn.apache.org/viewvc?rev=1084621&view=rev
Log:
Decouple apr_xml from reliance on Expat

Build with expat and it's effectively unchanged.
The alternative build with libxml2 is compatible to the point
that it passes the test suite, but shouldn't be considered
ready for primetime!

Various hacks want sorting: this is proof-of-concept.

This requires a compile-time choice.  Runtime would be nice,
round tuits permitting.

Build hacks & docs TBD

Added:
    apr/apr/trunk/xml/apr_xml_expat.c
    apr/apr/trunk/xml/apr_xml_internal.h
    apr/apr/trunk/xml/apr_xml_libxml2.c
Modified:
    apr/apr/trunk/xml/apr_xml.c

Modified: apr/apr/trunk/xml/apr_xml.c
URL: http://svn.apache.org/viewvc/apr/apr/trunk/xml/apr_xml.c?rev=1084621&r1=1084620&r2=1084621&view=diff
==============================================================================
--- apr/apr/trunk/xml/apr_xml.c (original)
+++ apr/apr/trunk/xml/apr_xml.c Wed Mar 23 16:01:12 2011
@@ -23,22 +23,15 @@
 #include "apr_want.h"
 
 #include "apr_xml.h"
-
-#if defined(HAVE_XMLPARSE_XMLPARSE_H)
-#include <xmlparse/xmlparse.h>
-#elif defined(HAVE_XMLTOK_XMLPARSE_H)
-#include <xmltok/xmlparse.h>
-#elif defined(HAVE_XML_XMLPARSE_H)
-#include <xml/xmlparse.h>
-#else
-#include <expat.h>
-#endif
+typedef void* XML_Parser;
+typedef int XML_Error;
+typedef unsigned char XML_Char;
+#include "apr_xml_internal.h"
 
 #define DEBUG_CR "\r\n"
 
 static const char APR_KW_xmlns[] = { 0x78, 0x6D, 0x6C, 0x6E, 0x73, '\0' };
 static const char APR_KW_xmlns_lang[] = { 0x78, 0x6D, 0x6C, 0x3A, 0x6C, 0x61, 0x6E, 0x67, '\0' };
-static const char APR_KW_DAV[] = { 0x44, 0x41, 0x56, 0x3A, '\0' };
 
 /* errors related to namespace processing */
 #define APR_XML_NS_ERROR_UNKNOWN_PREFIX (-1000)
@@ -51,21 +44,6 @@ static const char APR_KW_DAV[] = { 0x44,
           (name[2] == 0x4C || name[2] == 0x6C) )
 
 
-/* the real (internal) definition of the parser context */
-struct apr_xml_parser {
-    apr_xml_doc *doc;           /* the doc we're parsing */
-    apr_pool_t *p;              /* the pool we allocate from */
-    apr_xml_elem *cur_elem;     /* current element */
-
-    int error;                  /* an error has occurred */
-#define APR_XML_ERROR_EXPAT             1
-#define APR_XML_ERROR_PARSE_DONE        2
-/* also: public APR_XML_NS_ERROR_* values (if any) */
-
-    XML_Parser xp;              /* the actual (Expat) XML parser */
-    enum XML_Error xp_err;      /* stored Expat error code */
-};
-
 /* struct for scoping namespace declarations */
 typedef struct apr_xml_ns_scope {
     const char *prefix;         /* prefix used for this ns */
@@ -139,7 +117,7 @@ static void start_handler(void *userdata
     elem->name = elem_name = apr_pstrdup(parser->p, name);
 
     /* fill in the attributes (note: ends up in reverse order) */
-    while (*attrs) {
+    while (attrs && *attrs) {
         attr = apr_palloc(parser->p, sizeof(*attr));
         attr->name = apr_pstrdup(parser->p, *attrs++);
         attr->value = apr_pstrdup(parser->p, *attrs++);
@@ -336,111 +314,26 @@ static void cdata_handler(void *userdata
     apr_text_append(parser->p, hdr, s);
 }
 
-static apr_status_t cleanup_parser(void *ctx)
-{
-    apr_xml_parser *parser = ctx;
-
-    XML_ParserFree(parser->xp);
-    parser->xp = NULL;
-
-    return APR_SUCCESS;
-}
-
-#if XML_MAJOR_VERSION > 1
-/* Stop the parser if an entity declaration is hit. */
-static void entity_declaration(void *userData, const XML_Char *entityName,
-                               int is_parameter_entity, const XML_Char *value,
-                               int value_length, const XML_Char *base,
-                               const XML_Char *systemId, const XML_Char *publicId,
-                               const XML_Char *notationName)
-{
-    apr_xml_parser *parser = userData;
-
-    XML_StopParser(parser->xp, XML_FALSE);
-}
-#else
-/* A noop default_handler. */
-static void default_handler(void *userData, const XML_Char *s, int len)
-{
-}
-#endif
-
 APR_DECLARE(apr_xml_parser *) apr_xml_parser_create(apr_pool_t *pool)
 {
-    apr_xml_parser *parser = apr_pcalloc(pool, sizeof(*parser));
-
-    parser->p = pool;
-    parser->doc = apr_pcalloc(pool, sizeof(*parser->doc));
-
-    parser->doc->namespaces = apr_array_make(pool, 5, sizeof(const char *));
-
-    /* ### is there a way to avoid hard-coding this? */
-    apr_xml_insert_uri(parser->doc->namespaces, APR_KW_DAV);
-
-    parser->xp = XML_ParserCreate(NULL);
-    if (parser->xp == NULL) {
-        (*apr_pool_abort_get(pool))(APR_ENOMEM);
-        return NULL;
-    }
-
-    apr_pool_cleanup_register(pool, parser, cleanup_parser,
-                              apr_pool_cleanup_null);
-
-    XML_SetUserData(parser->xp, parser);
-    XML_SetElementHandler(parser->xp, start_handler, end_handler);
-    XML_SetCharacterDataHandler(parser->xp, cdata_handler);
-
-    /* Prevent the "billion laughs" attack against expat by disabling
-     * internal entity expansion.  With 2.x, forcibly stop the parser
-     * if an entity is declared - this is safer and a more obvious
-     * failure mode.  With older versions, installing a noop
-     * DefaultHandler means that internal entities will be expanded as
-     * the empty string, which is also sufficient to prevent the
-     * attack. */
-#if XML_MAJOR_VERSION > 1
-    XML_SetEntityDeclHandler(parser->xp, entity_declaration);
-#else
-    XML_SetDefaultHandler(parser->xp, default_handler);
-#endif
-
-    return parser;
-}
-
-static apr_status_t do_parse(apr_xml_parser *parser,
-                             const char *data, apr_size_t len,
-                             int is_final)
-{
-    if (parser->xp == NULL) {
-        parser->error = APR_XML_ERROR_PARSE_DONE;
-    }
-    else {
-        int rv = XML_Parse(parser->xp, data, (int)len, is_final);
-
-        if (rv == 0) {
-            parser->error = APR_XML_ERROR_EXPAT;
-            parser->xp_err = XML_GetErrorCode(parser->xp);
-        }
-    }
-
-    /* ### better error code? */
-    return parser->error ? APR_EGENERAL : APR_SUCCESS;
+    return apr_xml_parser_create_ex(pool, &start_handler, &end_handler, &cdata_handler);
 }
 
 APR_DECLARE(apr_status_t) apr_xml_parser_feed(apr_xml_parser *parser,
                                               const char *data,
                                               apr_size_t len)
 {
-    return do_parse(parser, data, len, 0 /* is_final */);
+    return parser->impl->Parse(parser, data, len, 0 /* is_final */);
 }
 
 APR_DECLARE(apr_status_t) apr_xml_parser_done(apr_xml_parser *parser,
                                               apr_xml_doc **pdoc)
 {
     char end;
-    apr_status_t status = do_parse(parser, &end, 0, 1 /* is_final */);
+    apr_status_t status = parser->impl->Parse(parser, &end, 0, 1 /* is_final */);
 
     /* get rid of the parser */
-    (void) apr_pool_cleanup_run(parser->p, parser, cleanup_parser);
+    (void) apr_pool_cleanup_run(parser->p, parser, parser->impl->cleanup);
 
     if (status)
         return status;
@@ -476,7 +369,7 @@ APR_DECLARE(char *) apr_xml_parser_geter
     case APR_XML_ERROR_EXPAT:
         (void) apr_snprintf(errbuf, errbufsize,
                             "XML parser error code: %s (%d)",
-                            XML_ErrorString(parser->xp_err), parser->xp_err);
+                            parser->xp_msg, parser->xp_err);
         return errbuf;
 
     case APR_XML_ERROR_PARSE_DONE:

Added: apr/apr/trunk/xml/apr_xml_expat.c
URL: http://svn.apache.org/viewvc/apr/apr/trunk/xml/apr_xml_expat.c?rev=1084621&view=auto
==============================================================================
--- apr/apr/trunk/xml/apr_xml_expat.c (added)
+++ apr/apr/trunk/xml/apr_xml_expat.c Wed Mar 23 16:01:12 2011
@@ -0,0 +1,134 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "apr.h"
+#include "apr_xml.h"
+
+typedef enum XML_Error XML_Error;
+#if defined(HAVE_XMLPARSE_XMLPARSE_H)
+#include <xmlparse/xmlparse.h>
+#elif defined(HAVE_XMLTOK_XMLPARSE_H)
+#include <xmltok/xmlparse.h>
+#elif defined(HAVE_XML_XMLPARSE_H)
+#include <xml/xmlparse.h>
+#else
+#include <expat.h>
+#endif
+
+
+#include "apr_xml_internal.h"
+
+static apr_status_t cleanup_parser(void *ctx)
+{
+    apr_xml_parser *parser = ctx;
+
+    XML_ParserFree(parser->xp);
+    parser->xp = NULL;
+
+    return APR_SUCCESS;
+}
+static apr_status_t do_parse(apr_xml_parser *parser,
+                             const char *data, apr_size_t len,
+                             int is_final)
+{
+    if (parser->xp == NULL) {
+        parser->error = APR_XML_ERROR_PARSE_DONE;
+    }
+    else {
+        int rv = XML_Parse(parser->xp, data, (int)len, is_final);
+
+        if (rv == 0) {
+            parser->error = APR_XML_ERROR_EXPAT;
+            parser->xp_err = XML_GetErrorCode(parser->xp);
+            parser->xp_msg = XML_ErrorString(parser->xp_err);
+        }
+    }
+
+    /* ### better error code? */
+    return parser->error ? APR_EGENERAL : APR_SUCCESS;
+}
+
+
+static XMLParserImpl xml_parser_expat = {
+    do_parse,
+    cleanup_parser
+};
+
+XMLParserImpl* apr_xml_get_parser_impl(void) { return &xml_parser_expat; }
+static const char APR_KW_DAV[] = { 0x44, 0x41, 0x56, 0x3A, '\0' };
+
+#if XML_MAJOR_VERSION > 1
+/* Stop the parser if an entity declaration is hit. */
+static void entity_declaration(void *userData, const XML_Char *entityName,
+                               int is_parameter_entity, const XML_Char *value,
+                               int value_length, const XML_Char *base,
+                               const XML_Char *systemId, const XML_Char *publicId,
+                               const XML_Char *notationName)
+{
+    apr_xml_parser *parser = userData;
+
+    XML_StopParser(parser->xp, XML_FALSE);
+}
+#else
+/* A noop default_handler. */
+static void default_handler(void *userData, const XML_Char *s, int len)
+{
+}
+#endif
+
+APR_DECLARE(apr_xml_parser *) apr_xml_parser_create_ex(apr_pool_t *pool,
+    void *start_func, void *end_func, void *cdata_func)
+{
+    apr_xml_parser *parser = apr_pcalloc(pool, sizeof(*parser));
+
+    parser->impl = apr_xml_get_parser_impl();
+
+    parser->p = pool;
+    parser->doc = apr_pcalloc(pool, sizeof(*parser->doc));
+
+    parser->doc->namespaces = apr_array_make(pool, 5, sizeof(const char *));
+
+    /* ### is there a way to avoid hard-coding this? */
+    apr_xml_insert_uri(parser->doc->namespaces, APR_KW_DAV);
+
+    parser->xp = XML_ParserCreate(NULL);
+    if (parser->xp == NULL) {
+        (*apr_pool_abort_get(pool))(APR_ENOMEM);
+        return NULL;
+    }
+
+    apr_pool_cleanup_register(pool, parser, cleanup_parser,
+                              apr_pool_cleanup_null);
+
+    XML_SetUserData(parser->xp, parser);
+    XML_SetElementHandler(parser->xp, start_func, end_func);
+    XML_SetCharacterDataHandler(parser->xp, cdata_func);
+
+    /* Prevent the "billion laughs" attack against expat by disabling
+     * internal entity expansion.  With 2.x, forcibly stop the parser
+     * if an entity is declared - this is safer and a more obvious
+     * failure mode.  With older versions, installing a noop
+     * DefaultHandler means that internal entities will be expanded as
+     * the empty string, which is also sufficient to prevent the
+     * attack. */
+#if XML_MAJOR_VERSION > 1
+    XML_SetEntityDeclHandler(parser->xp, entity_declaration);
+#else
+    XML_SetDefaultHandler(parser->xp, default_handler);
+#endif
+
+    return parser;
+}

Added: apr/apr/trunk/xml/apr_xml_internal.h
URL: http://svn.apache.org/viewvc/apr/apr/trunk/xml/apr_xml_internal.h?rev=1084621&view=auto
==============================================================================
--- apr/apr/trunk/xml/apr_xml_internal.h (added)
+++ apr/apr/trunk/xml/apr_xml_internal.h Wed Mar 23 16:01:12 2011
@@ -0,0 +1,50 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef APR_XML_INTERNAL_H
+#define APR_XML_INTERNAL_H
+
+
+struct XMLParserImpl {
+    apr_status_t (*Parse)(apr_xml_parser*, const char*, apr_size_t, int);
+    apr_status_t (*cleanup)(void*);
+};
+typedef struct XMLParserImpl XMLParserImpl;
+XMLParserImpl* apr_xml_get_parser_impl(void);
+
+
+/* the real (internal) definition of the parser context */
+struct apr_xml_parser {
+    apr_xml_doc *doc;           /* the doc we're parsing */
+    apr_pool_t *p;              /* the pool we allocate from */
+    apr_xml_elem *cur_elem;     /* current element */
+
+    int error;                  /* an error has occurred */
+#define APR_XML_ERROR_EXPAT             1
+#define APR_XML_ERROR_PARSE_DONE        2
+/* also: public APR_XML_NS_ERROR_* values (if any) */
+
+    XML_Parser xp;              /* the actual (Expat) XML parser */
+    XML_Error xp_err;      /* stored Expat error code */
+    const char *xp_msg;
+    XMLParserImpl *impl;
+};
+
+
+
+apr_xml_parser *apr_xml_parser_create_ex(apr_pool_t*, void*, void*, void*);
+
+#endif

Added: apr/apr/trunk/xml/apr_xml_libxml2.c
URL: http://svn.apache.org/viewvc/apr/apr/trunk/xml/apr_xml_libxml2.c?rev=1084621&view=auto
==============================================================================
--- apr/apr/trunk/xml/apr_xml_libxml2.c (added)
+++ apr/apr/trunk/xml/apr_xml_libxml2.c Wed Mar 23 16:01:12 2011
@@ -0,0 +1,96 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "apr.h"
+#include "apr_xml.h"
+
+#include <libxml/parser.h>
+#include <libxml/xmlerror.h>
+
+typedef xmlParserCtxtPtr XML_Parser;
+typedef xmlParserErrors XML_Error;
+
+#include "apr_xml_internal.h"
+
+static apr_status_t cleanup_parser(void *ctx)
+{
+    apr_xml_parser *parser = ctx;
+
+    xmlFreeParserCtxt(parser->xp);
+    parser->xp = NULL;
+
+    return APR_SUCCESS;
+}
+static int libxml2_parse(apr_xml_parser* parser, const char* data,
+                         apr_size_t sz, int final)
+{
+    parser->xp_err = xmlParseChunk(parser->xp, data, sz, final);
+    if (parser->xp_err != 0) {
+        xmlErrorPtr errptr = xmlCtxtGetLastError(parser->xp);
+        parser->xp_msg = errptr->message;
+        /* this misnomer is used as a test for (any) parser error. */
+        parser->error = APR_XML_ERROR_EXPAT;
+    }
+    return parser->xp_err;
+}
+static XMLParserImpl xml_parser_libxml2 = {
+    libxml2_parse,
+    cleanup_parser
+};
+
+static const char APR_KW_DAV[] = { 0x44, 0x41, 0x56, 0x3A, '\0' };
+
+XMLParserImpl* apr_xml_get_parser_impl(void)
+{
+    return &xml_parser_libxml2;
+}
+
+
+APR_DECLARE(apr_xml_parser *) apr_xml_parser_create_ex(apr_pool_t *pool,
+    void *start_func, void *end_func, void *cdata_func)
+{
+    apr_xml_parser *parser = apr_pcalloc(pool, sizeof(*parser));
+    /* FIXME: This is a mismatch.  We should create a single global
+     * sax instance and re-use it for every parser.  That means we
+     * need an up-front initialisation function.
+     */
+    xmlSAXHandlerPtr sax = apr_pcalloc(pool, sizeof(xmlSAXHandler));
+    sax->startElement = start_func;
+    sax->endElement = end_func;
+    sax->characters = cdata_func;
+    sax->initialized = 1;
+
+    parser->impl = apr_xml_get_parser_impl();
+
+    parser->p = pool;
+    parser->doc = apr_pcalloc(pool, sizeof(*parser->doc));
+
+    parser->doc->namespaces = apr_array_make(pool, 5, sizeof(const char *));
+
+    /* ### is there a way to avoid hard-coding this? */
+    apr_xml_insert_uri(parser->doc->namespaces, APR_KW_DAV);
+
+    parser->xp = xmlCreatePushParserCtxt(sax, parser, NULL, 0, NULL);
+    if (parser->xp == NULL) {
+        (*apr_pool_abort_get(pool))(APR_ENOMEM);
+        return NULL;
+    }
+
+    apr_pool_cleanup_register(pool, parser, cleanup_parser,
+                              apr_pool_cleanup_null);
+
+    return parser;
+}



Re: svn commit: r1084621 - in /apr/apr/trunk/xml: apr_xml.c apr_xml_expat.c apr_xml_internal.h apr_xml_libxml2.c

Posted by Guenter Knauf <fu...@apache.org>.
Am 23.03.2011 18:53, schrieb Jeff Trawick:
> On Wed, Mar 23, 2011 at 12:01 PM,<ni...@apache.org>  wrote:
>> Author: niq
>> Date: Wed Mar 23 16:01:12 2011
>> New Revision: 1084621
>>
>> URL: http://svn.apache.org/viewvc?rev=1084621&view=rev
>> Log:
>> Decouple apr_xml from reliance on Expat
>>
>> Build with expat and it's effectively unchanged.
>> The alternative build with libxml2 is compatible to the point
>> that it passes the test suite, but shouldn't be considered
>> ready for primetime!
>
> how does one build only with expat?
>
> without libxml2 dev files, my build starts failing with
>
> xml/apr_xml_libxml2.c:20:27: error: libxml/parser.h: No such file or directory
>
and I get:
Building D:/projects/svn-httpd/apr-head/apr/xml
Creating 
D:\projects\svn-httpd\apr-head\..\srclib\expat-2.0.1/lib/expat_config.h
GEN  obj_release/xml_cc.opt
CC   apr_xml.c
CC   apr_xml_expat.c
### mwccnlm Compiler:
#    File: apr_xml_expat.c
# ------------------------
#      20: typedef enum XML_Error XML_Error;
#   Error:                        ^^^^^^^^^
#   undefined identifier 'XML_Error'

Gün.




Re: svn commit: r1084621 - in /apr/apr/trunk/xml: apr_xml.c apr_xml_expat.c apr_xml_internal.h apr_xml_libxml2.c

Posted by Nick Kew <ni...@apache.org>.
On 23 Mar 2011, at 17:53, Jeff Trawick wrote:

> On Wed, Mar 23, 2011 at 12:01 PM,  <ni...@apache.org> wrote:
>> Author: niq
>> Date: Wed Mar 23 16:01:12 2011
>> New Revision: 1084621
>> 
>> URL: http://svn.apache.org/viewvc?rev=1084621&view=rev
>> Log:
>> Decouple apr_xml from reliance on Expat
>> 
>> Build with expat and it's effectively unchanged.
>> The alternative build with libxml2 is compatible to the point
>> that it passes the test suite, but shouldn't be considered
>> ready for primetime!
> 
> how does one build only with expat?

Build TBD.  I guess the easiest fix will be a configure option that
#ifdefs out whichever implementation is not selected.

My builds to date have involved hacking the make.

> without libxml2 dev files, my build starts failing with
> 
> xml/apr_xml_libxml2.c:20:27: error: libxml/parser.h: No such file or directory

-I/usr/include/libxml2

There may be other default places to look.

-- 
Nick Kew

Available for work, contract or permanent
http://www.webthing.com/~nick/cv.html


Re: svn commit: r1084621 - in /apr/apr/trunk/xml: apr_xml.c apr_xml_expat.c apr_xml_internal.h apr_xml_libxml2.c

Posted by Jeff Trawick <tr...@gmail.com>.
On Wed, Mar 23, 2011 at 12:01 PM,  <ni...@apache.org> wrote:
> Author: niq
> Date: Wed Mar 23 16:01:12 2011
> New Revision: 1084621
>
> URL: http://svn.apache.org/viewvc?rev=1084621&view=rev
> Log:
> Decouple apr_xml from reliance on Expat
>
> Build with expat and it's effectively unchanged.
> The alternative build with libxml2 is compatible to the point
> that it passes the test suite, but shouldn't be considered
> ready for primetime!

how does one build only with expat?

without libxml2 dev files, my build starts failing with

xml/apr_xml_libxml2.c:20:27: error: libxml/parser.h: No such file or directory