You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@httpd.apache.org by ni...@apache.org on 2014/06/01 22:05:44 UTC

svn commit: r1599027 - in /httpd/httpd/trunk: CHANGES docs/manual/mod/mod_proxy_html.xml modules/filters/mod_proxy_html.c

Author: niq
Date: Sun Jun  1 20:05:44 2014
New Revision: 1599027

URL: http://svn.apache.org/r1599027
Log:
mod_proxy_html: support automatic doctype detection.
PR 56285
Patch by Micha Lenk, adapted by niq

Modified:
    httpd/httpd/trunk/CHANGES
    httpd/httpd/trunk/docs/manual/mod/mod_proxy_html.xml
    httpd/httpd/trunk/modules/filters/mod_proxy_html.c

Modified: httpd/httpd/trunk/CHANGES
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/CHANGES?rev=1599027&r1=1599026&r2=1599027&view=diff
==============================================================================
--- httpd/httpd/trunk/CHANGES [utf-8] (original)
+++ httpd/httpd/trunk/CHANGES [utf-8] Sun Jun  1 20:05:44 2014
@@ -1,6 +1,9 @@
                                                          -*- coding: utf-8 -*-
 Changes with Apache 2.5.0
 
+  *) mod_proxy_html: support automatic detection of doctype and processing
+     of FPIs.  PR56285 [Micha Lenk <micha lenk info>, Nick Kew]
+
   *) mod_proxy_html: skip documents shorter than 4 bytes
      PR 56286 [Micha Lenk <micha lenk info>]
 

Modified: httpd/httpd/trunk/docs/manual/mod/mod_proxy_html.xml
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/docs/manual/mod/mod_proxy_html.xml?rev=1599027&r1=1599026&r2=1599027&view=diff
==============================================================================
--- httpd/httpd/trunk/docs/manual/mod/mod_proxy_html.xml (original)
+++ httpd/httpd/trunk/docs/manual/mod/mod_proxy_html.xml Sun Jun  1 20:05:44 2014
@@ -227,7 +227,10 @@ for earlier 2.x versions</compatibility>
 <name>ProxyHTMLDocType</name>
 <description>Sets an HTML or XHTML document type declaration.</description>
 <syntax>ProxyHTMLDocType <var>HTML|XHTML [Legacy]</var><br/><strong>OR</strong>
-<br/>ProxyHTMLDocType <var>fpi [SGML|XML]</var></syntax>
+<br/>ProxyHTMLDocType <var>fpi [SGML|XML]</var><br/><strong>OR</strong>
+<br/>ProxyHTMLDocType <var>html5</var><br/><strong>OR</strong>
+<br/>ProxyHTMLDocType <var>auto</var></syntax>
+<default>ProxyHTMLDocType auto (2.5/trunk versions); no FPI (2.4.x)</default>
 <contextlist><context>server config</context>
 <context>virtual host</context><context>directory</context>
 </contextlist>
@@ -245,9 +248,9 @@ be necessary if you are proxying pre-199
 authoring/publishing tools.</p>
 <p>In the second form, it will insert your own FPI.  The optional second
 argument determines whether SGML/HTML or XML/XHTML syntax will be used.</p>
-<p>The default is changed to omitting any FPI,
-on the grounds that no FPI is better than a bogus one.  If your backend
-generates decent HTML or XHTML, set it accordingly.</p>
+<p>The third form declares documents as HTML 5.</p>
+<p>The fourth form is new in HTTPD trunk and not yet available in released
+versions, and uses libxml2's HTML parser to detect the doctype.</p>
 <p>If the first form is used, mod_proxy_html
 will also clean up the HTML to the specified standard.  It cannot
 fix every error, but it will strip out bogus elements and attributes.

Modified: httpd/httpd/trunk/modules/filters/mod_proxy_html.c
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/filters/mod_proxy_html.c?rev=1599027&r1=1599026&r2=1599027&view=diff
==============================================================================
--- httpd/httpd/trunk/modules/filters/mod_proxy_html.c (original)
+++ httpd/httpd/trunk/modules/filters/mod_proxy_html.c Sun Jun  1 20:05:44 2014
@@ -108,6 +108,7 @@ typedef struct {
     size_t avail;
     const char *encoding;
     urlmap *map;
+    const char *etag;
 } saxctxt;
 
 
@@ -280,6 +281,33 @@ static void dump_content(saxctxt *ctx)
     }
     AP_fwrite(ctx, ctx->buf, strlen(ctx->buf), 1);
 }
+static void pinternalSubset(void* ctxt, const xmlChar *name,
+                            const xmlChar *externalID, const xmlChar *sysID)
+{
+    saxctxt* ctx = (saxctxt*) ctxt;
+    if (!ctxt || !name) {
+        /* sanity check */
+        return;
+    }
+    if (ctx->cfg->doctype != DEFAULT_DOCTYPE) {
+        /* do nothing if overridden in config */
+        return;
+    }
+    ap_fputstrs(ctx->f->next, ctx->bb, "<!DOCTYPE ", (const char *)name, NULL);
+    if (externalID) {
+        if (!strcasecmp((const char*)name, "html") &&
+            !strncasecmp((const char *)externalID, "-//W3C//DTD XHTML ", 18)) {
+            ctx->etag = xhtml_etag;
+        }
+        else {
+            ctx->etag = html_etag;
+        }
+        ap_fputstrs(ctx->f->next, ctx->bb, " PUBLIC \"", (const char *)externalID, "\"", NULL);
+    if (sysID)
+        ap_fputstrs(ctx->f->next, ctx->bb, " \"", (const char *)sysID, "\"", NULL);
+    }
+    ap_fputs(ctx->f->next, ctx->bb, ">\n");
+}
 static void pcdata(void *ctxt, const xmlChar *uchars, int length)
 {
     const char *chars = (const char*) uchars;
@@ -632,7 +660,7 @@ static void pstartElement(void *ctxt, co
     }
     ctx->offset = 0;
     if (desc && desc->empty)
-        ap_fputs(ctx->f->next, ctx->bb, ctx->cfg->etag);
+        ap_fputs(ctx->f->next, ctx->bb, ctx->etag);
     else
         ap_fputc(ctx->f->next, ctx->bb, '>');
 
@@ -837,6 +865,7 @@ static saxctxt *check_filter_init (ap_fi
         fctx->bb = apr_brigade_create(f->r->pool,
                                       f->r->connection->bucket_alloc);
         fctx->cfg = cfg;
+        fctx->etag = cfg->etag;
         apr_table_unset(f->r->headers_out, "Content-Length");
 
         if (cfg->interp)
@@ -1129,7 +1158,10 @@ static const char *set_doctype(cmd_parms
                                const char *t, const char *l)
 {
     proxy_html_conf *cfg = (proxy_html_conf *)CFG;
-    if (!strcasecmp(t, "xhtml")) {
+    if (!strcasecmp(t, "auto")) {
+        cfg->doctype = DEFAULT_DOCTYPE; /* activates pinternalSubset */
+    }
+    else if (!strcasecmp(t, "xhtml")) {
         cfg->etag = xhtml_etag;
         if (l && !strcasecmp(l, "legacy"))
             cfg->doctype = fpi_xhtml_legacy;
@@ -1249,6 +1281,7 @@ static int mod_proxy_html(apr_pool_t *p,
     sax.characters = pcharacters;
     sax.comment = pcomment;
     sax.cdataBlock = pcdata;
+    sax.internalSubset = pinternalSubset;
     xml2enc_charset = APR_RETRIEVE_OPTIONAL_FN(xml2enc_charset);
     xml2enc_filter = APR_RETRIEVE_OPTIONAL_FN(xml2enc_filter);
     if (!xml2enc_charset) {