You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@httpd.apache.org by yl...@apache.org on 2020/06/22 10:37:42 UTC

svn commit: r1879080 - in /httpd/httpd/trunk/modules/proxy: mod_proxy.c mod_proxy.h

Author: ylavic
Date: Mon Jun 22 10:37:41 2020
New Revision: 1879080

URL: http://svn.apache.org/viewvc?rev=1879080&view=rev
Log:
Allow for proxy servlet mapping at pre_translate_name stage.

Provide alias_match_servlet(), the servlet counterpart of alias_match(),
which maps the request URI-path to the ProxyPass alias ignoring path
parameters, while still forwarding them (above the alias).

This is needed to proxy servlet URIs for application handled by Tomcat,
which can then make use of the path/segments parameters.

Github: closes #128


Modified:
    httpd/httpd/trunk/modules/proxy/mod_proxy.c
    httpd/httpd/trunk/modules/proxy/mod_proxy.h

Modified: httpd/httpd/trunk/modules/proxy/mod_proxy.c
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/proxy/mod_proxy.c?rev=1879080&r1=1879079&r2=1879080&view=diff
==============================================================================
--- httpd/httpd/trunk/modules/proxy/mod_proxy.c (original)
+++ httpd/httpd/trunk/modules/proxy/mod_proxy.c Mon Jun 22 10:37:41 2020
@@ -570,6 +570,198 @@ static int alias_match(const char *uri,
     return urip - uri;
 }
 
+/*
+ * Inspired by mod_jk's jk_servlet_normalize().
+ */
+static int alias_match_servlet(apr_pool_t *p,
+                               const char *uri,
+                               const char *alias)
+{
+    char *map;
+    apr_array_header_t *stack;
+    int map_pos, uri_pos, alias_pos, first_pos;
+    int alias_depth = 0, depth;
+
+    /* Both uri and alias should start with '/' */
+    if (uri[0] != '/' || alias[0] != '/') {
+        return 0;
+    }
+
+    stack = apr_array_make(p, 5, sizeof(int));
+    map = apr_palloc(p, strlen(uri) + 1);
+    map[0] = '/';
+    map[1] = '\0';
+
+    map_pos = uri_pos = alias_pos = first_pos = 1;
+    while (uri[uri_pos] != '\0') {
+        /* Remove path parameters ;foo=bar/ from any path segment */
+        if (uri[uri_pos] == ';') {
+            do {
+                uri_pos++;
+            } while (uri[uri_pos] != '/' && uri[uri_pos] != '\0');
+            continue;
+        }
+
+        if (map[map_pos - 1] == '/') {
+            /* Collapse ///// sequences to / */
+            if (uri[uri_pos] == '/') {
+                do {
+                    uri_pos++;
+                } while (uri[uri_pos] == '/');
+                continue;
+            }
+
+            if (uri[uri_pos] == '.') {
+                /* Remove /./ segments */
+                if (uri[uri_pos + 1] == '/'
+                        || uri[uri_pos + 1] == ';'
+                        || uri[uri_pos + 1] == '\0') {
+                    uri_pos++;
+                    if (uri[uri_pos] == '/') {
+                        uri_pos++;
+                    }
+                    continue;
+                }
+
+                /* Remove /xx/../ segments */
+                if (uri[uri_pos + 1] == '.'
+                    && (uri[uri_pos + 2] == '/'
+                        || uri[uri_pos + 2] == ';'
+                        || uri[uri_pos + 2] == '\0')) {
+                    /* Wind map segment back the previous one */
+                    if (map_pos == 1) {
+                        /* Above root */
+                        return 0;
+                    }
+                    do {
+                        map_pos--;
+                    } while (map[map_pos - 1] != '/');
+                    map[map_pos] = '\0';
+
+                    /* Wind alias segment back, unless in deeper segment */
+                    if (alias_depth == stack->nelts) {
+                        if (alias[alias_pos] == '\0') {
+                            alias_pos--;
+                        }
+                        while (alias_pos > 0 && alias[alias_pos] == '/') {
+                            alias_pos--;
+                        }
+                        while (alias_pos > 0 && alias[alias_pos - 1] != '/') {
+                            alias_pos--;
+                        }
+                        AP_DEBUG_ASSERT(alias_pos > 0);
+                        alias_depth--;
+                    }
+                    apr_array_pop(stack);
+
+                    /* Move uri forward to the next segment */
+                    uri_pos += 2;
+                    if (uri[uri_pos] == '/') {
+                        uri_pos++;
+                    }
+                    first_pos = 0;
+                    continue;
+                }
+            }
+            if (first_pos) {
+                while (uri[first_pos] == '/') {
+                    first_pos++;
+                }
+            }
+
+            /* New segment */
+            APR_ARRAY_PUSH(stack, int) = first_pos ? first_pos : uri_pos;
+            if (alias[alias_pos] != '\0') {
+                if (alias[alias_pos - 1] != '/') {
+                    /* Remain in pair with uri segments */
+                    do {
+                        alias_pos++;
+                    } while (alias[alias_pos - 1] != '/' && alias[alias_pos]);
+                }
+                while (alias[alias_pos] == '/') {
+                    alias_pos++;
+                }
+                if (alias[alias_pos] != '\0') {
+                    alias_depth++;
+                }
+            }
+        }
+
+        if (alias[alias_pos] != '\0') {
+            int *match = &APR_ARRAY_IDX(stack, alias_depth - 1, int);
+            if (*match) {
+                if (alias[alias_pos] != uri[uri_pos]) {
+                    /* Current segment does not match */
+                    *match = 0;
+                }
+                else if (alias[alias_pos + 1] == '\0'
+                         && alias[alias_pos] != '/') {
+                    if (uri[uri_pos + 1] == ';') {
+                        /* We'll preserve the parameters of the last
+                         * segment if it does not end with '/', so mark
+                         * the match as negative for below handling.
+                         */
+                        *match = -(uri_pos + 1);
+                    }
+                    else if (uri[uri_pos + 1] != '/'
+                             && uri[uri_pos + 1] != '\0') {
+                        /* Last segment does not match all the way */
+                        *match = 0;
+                    }
+                }
+            }
+            /* Don't go past the segment if the uri isn't there yet */
+            if (alias[alias_pos] != '/' || uri[uri_pos] == '/') {
+                alias_pos++;
+            }
+        }
+
+        if (uri[uri_pos] == '/') {
+            first_pos = uri_pos + 1;
+        }
+        map[map_pos++] = uri[uri_pos++];
+        map[map_pos] = '\0';
+    }
+
+    /* Can't reach the end of uri before the end of the alias,
+     * for example if uri is "/" and alias is "/examples"
+     */
+    if (alias[alias_pos] != '\0') {
+        return 0;
+    }
+
+    /* Check whether each alias segment matched */
+    for (depth = 0; depth < alias_depth; ++depth) {
+        if (!APR_ARRAY_IDX(stack, depth, int)) {
+            return 0;
+        }
+    }
+
+    /* If alias_depth == stack->nelts we have a full match, i.e.
+     * uri == alias so we can return uri_pos as is (the end of uri)
+     */
+    if (alias_depth < stack->nelts) {
+        /* Return the segment following the alias */
+        uri_pos = APR_ARRAY_IDX(stack, alias_depth, int);
+        if (alias_depth) {
+            /* But if the last segment of the alias does not end with '/'
+             * and the corresponding segment of the uri has parameters,
+             * we want to forward those parameters (see above for the
+             * negative pos trick/mark).
+             */
+            int pos = APR_ARRAY_IDX(stack, alias_depth - 1, int);
+            if (pos < 0) {
+                uri_pos = -pos;
+            }
+        }
+    }
+    /* If the alias lacks a trailing slash, take it from the uri (if any) */
+    if (alias[alias_pos - 1] != '/' && uri[uri_pos - 1] == '/') {
+        uri_pos--;
+    }
+    return uri_pos;
+}
+
 /* Detect if an absoluteURI should be proxied or not.  Note that we
  * have to do this during this phase because later phases are
  * "short-circuiting"... i.e. translate_names will end when the first
@@ -740,7 +932,18 @@ PROXY_DECLARE(int) ap_proxy_trans_match(
         }
     }
     else {
-        len = alias_match(r->uri, fake);
+        /* Ignore servlet mapping if r->uri was decoded already, or we
+         * might consider for instance that an original %3B is a delimiter
+         * for path parameters (which is not).
+         */
+        if (!dconf->mapping_decoded
+                && (ent->flags & PROXYPASS_MAPPING_SERVLET)) {
+            nocanon = 0; /* ignored since servlet's normalization applies */
+            len = alias_match_servlet(r->pool, r->uri, fake);
+        }
+        else {
+            len = alias_match(r->uri, fake);
+        }
 
         if (len != 0) {
             if ((real[0] == '!') && (real[1] == '\0')) {
@@ -795,7 +998,7 @@ PROXY_DECLARE(int) ap_proxy_trans_match(
     return DONE;
 }
 
-static int proxy_trans(request_rec *r)
+static int proxy_trans(request_rec *r, int pre_trans)
 {
     int i;
     struct proxy_alias *ent;
@@ -809,6 +1012,20 @@ static int proxy_trans(request_rec *r)
         return OK;
     }
 
+    /* In early pre_trans hook, r->uri was not manipulated yet so we are
+     * compliant with RFC1945 at this point. Otherwise, it probably isn't
+     * an issue because this is a hybrid proxy/origin server.
+     */
+
+    dconf = ap_get_module_config(r->per_dir_config, &proxy_module);
+
+    /* Do the work from the hook corresponding to the ProxyMappingDecoded
+     * configuration (on/default: translate hook, off: pre_translate hook).
+     */
+    if (pre_trans ^ !dconf->mapping_decoded) {
+        return DECLINED;
+    }
+
     if ((r->unparsed_uri[0] == '*' && r->unparsed_uri[1] == '\0')
         || !r->uri || r->uri[0] != '/') {
         return DECLINED;
@@ -818,13 +1035,6 @@ static int proxy_trans(request_rec *r)
         return DECLINED;
     }
 
-    /* XXX: since r->uri has been manipulated already we're not really
-     * compliant with RFC1945 at this point.  But this probably isn't
-     * an issue because this is a hybrid proxy/origin server.
-     */
-
-    dconf = ap_get_module_config(r->per_dir_config, &proxy_module);
-
     /* short way - this location is reverse proxied? */
     if (dconf->alias) {
         int rv = ap_proxy_trans_match(r, dconf->alias, dconf);
@@ -846,9 +1056,19 @@ static int proxy_trans(request_rec *r)
             }
         }
     }
+
     return DECLINED;
 }
 
+static int proxy_pre_translate_name(request_rec *r)
+{
+    return proxy_trans(r, 1);
+}
+static int proxy_translate_name(request_rec *r)
+{
+    return proxy_trans(r, 0);
+}
+
 static int proxy_walk(request_rec *r)
 {
     proxy_server_conf *sconf = ap_get_module_config(r->server->module_config,
@@ -1593,6 +1813,7 @@ static void *create_proxy_dir_config(apr
     new->add_forwarded_headers_set = 0;
     new->forward_100_continue = 1;
     new->forward_100_continue_set = 0;
+    new->mapping_decoded = -1; /* unset (on by default) */
 
     return (void *) new;
 }
@@ -1651,6 +1872,9 @@ static void *merge_proxy_dir_config(apr_
     new->forward_100_continue_set = add->forward_100_continue_set
                                     || base->forward_100_continue_set;
     
+    new->mapping_decoded = (add->mapping_decoded == -1) ? base->mapping_decoded
+                                                        : add->mapping_decoded;
+
     return new;
 }
 
@@ -1806,6 +2030,9 @@ static const char *
         else if (!strcasecmp(word,"noquery")) {
             flags |= PROXYPASS_NOQUERY;
         }
+        else if (!strcasecmp(word,"mapping=servlet")) {
+            flags |= PROXYPASS_MAPPING_SERVLET;
+        }
         else {
             char *val = strchr(word, '=');
             if (!val) {
@@ -2799,6 +3026,9 @@ static const command_rec proxy_cmds[] =
     AP_INIT_FLAG("Proxy100Continue", forward_100_continue, NULL, RSRC_CONF|ACCESS_CONF,
      "on if 100-Continue should be forwarded to the origin server, off if the "
      "proxy should handle it by itself"),
+    AP_INIT_FLAG("ProxyMappingDecoded", ap_set_flag_slot_char,
+     (void*)APR_OFFSETOF(proxy_dir_conf, mapping_decoded), RSRC_CONF|ACCESS_CONF,
+     "Whether to percent-decode before URI-path mapping"),
     {NULL}
 };
 
@@ -3147,7 +3377,10 @@ static void register_hooks(apr_pool_t *p
     /* handler */
     ap_hook_handler(proxy_handler, NULL, NULL, APR_HOOK_FIRST);
     /* filename-to-URI translation */
-    ap_hook_translate_name(proxy_trans, aszSucc, NULL, APR_HOOK_FIRST);
+    ap_hook_pre_translate_name(proxy_pre_translate_name, NULL, NULL,
+                               APR_HOOK_MIDDLE);
+    ap_hook_translate_name(proxy_translate_name, aszSucc, NULL,
+                           APR_HOOK_FIRST);
     /* walk <Proxy > entries and suppress default TRACE behavior */
     ap_hook_map_to_storage(proxy_map_location, NULL,NULL, APR_HOOK_FIRST);
     /* fixups */

Modified: httpd/httpd/trunk/modules/proxy/mod_proxy.h
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/proxy/mod_proxy.h?rev=1879080&r1=1879079&r2=1879080&view=diff
==============================================================================
--- httpd/httpd/trunk/modules/proxy/mod_proxy.h (original)
+++ httpd/httpd/trunk/modules/proxy/mod_proxy.h Mon Jun 22 10:37:41 2020
@@ -123,6 +123,7 @@ struct proxy_remote {
 #define PROXYPASS_NOCANON 0x01
 #define PROXYPASS_INTERPOLATE 0x02
 #define PROXYPASS_NOQUERY 0x04
+#define PROXYPASS_MAPPING_SERVLET 0x08
 struct proxy_alias {
     const char  *real;
     const char  *fake;
@@ -243,6 +244,9 @@ typedef struct {
     unsigned int forward_100_continue_set:1;
 
     apr_array_header_t *error_override_codes;
+
+    /** Whether to map percent-decoded URI-path */
+    char mapping_decoded;
 } proxy_dir_conf;
 
 /* if we interpolate env vars per-request, we'll need a per-request



Re: svn commit: r1879080 - in /httpd/httpd/trunk/modules/proxy: mod_proxy.c mod_proxy.h

Posted by Yann Ylavic <yl...@gmail.com>.
On Fri, Jun 26, 2020 at 12:30 PM Ruediger Pluem <rp...@apache.org> wrote:
>
> On 6/26/20 12:01 PM, Yann Ylavic wrote:
> > On Fri, Jun 26, 2020 at 8:57 AM Ruediger Pluem <rp...@apache.org> wrote:
> >>
> >> On 6/25/20 2:16 PM, Yann Ylavic wrote:
> >>> On Thu, Jun 25, 2020 at 1:27 PM Ruediger Pluem <rp...@apache.org> wrote:
> >>>>
> >>>> On 6/25/20 12:13 PM, Yann Ylavic wrote:
> >>>>> Index: modules/proxy/mod_proxy.c
> >>>>> ===================================================================
> >>>>> --- modules/proxy/mod_proxy.c (revision 1879145)
> >>>>> +++ modules/proxy/mod_proxy.c (working copy)
> >>>>
> >>>>> @@ -987,10 +991,10 @@ PROXY_DECLARE(int) ap_proxy_trans_match(request_re
> >>>>>                        "URI path '%s' matches proxy handler '%s'", r->uri,
> >>>>>                        found);
> >>>>>
> >>>>> -        return OK;
> >>>>> +        return servlet ? DONE : OK;
> >>>>
> >>>> Why setting it to DONE in the servlet case? Wouldn't that cause ap_process_request_internal to be left early?
> >>>
> >>> No, ap_process_request_internal() would just skip r->uri decoding (we
> >>> are in pre_trans hook here, since mapping=servlet only happens there).
> >>> Anyway, it's an orthogonal change sorry, maybe we still want uri
> >>> decoding for directory/location walk in the servlet case, but since
> >>> this patch actually modifies r->uri (while other proxy mappings do
> >>> not), I thought it could be the final transformation (that's DONE
> >>> returned from pre_trans).
> >>
> >> Sorry, but I am still struggling: This is from server/request.c starting line 233
> >>
> >> 233        access_status = ap_run_pre_translate_name(r);
> >> 234        if (access_status != OK && access_status != DECLINED) {
> >> 235            return access_status;
> >> 236        }
> >
> > Ah yes sorry, I missed you were referring to this initial commit.
> > It was later changed (http://svn.apache.org/r1879137) like this:
>
> Sorry my bad :-(. I missed to svn up my working copy when trying to understand the attached patch,
> hence the questions. With r1879137 I am fine. Sorry for the noise.

Thanks, r1879235.


Regards;
Yann.

Re: svn commit: r1879080 - in /httpd/httpd/trunk/modules/proxy: mod_proxy.c mod_proxy.h

Posted by Ruediger Pluem <rp...@apache.org>.

On 6/26/20 12:01 PM, Yann Ylavic wrote:
> On Fri, Jun 26, 2020 at 8:57 AM Ruediger Pluem <rp...@apache.org> wrote:
>>
>> On 6/25/20 2:16 PM, Yann Ylavic wrote:
>>> On Thu, Jun 25, 2020 at 1:27 PM Ruediger Pluem <rp...@apache.org> wrote:
>>>>
>>>> On 6/25/20 12:13 PM, Yann Ylavic wrote:
>>>>> Index: modules/proxy/mod_proxy.c
>>>>> ===================================================================
>>>>> --- modules/proxy/mod_proxy.c (revision 1879145)
>>>>> +++ modules/proxy/mod_proxy.c (working copy)
>>>>
>>>>> @@ -987,10 +991,10 @@ PROXY_DECLARE(int) ap_proxy_trans_match(request_re
>>>>>                        "URI path '%s' matches proxy handler '%s'", r->uri,
>>>>>                        found);
>>>>>
>>>>> -        return OK;
>>>>> +        return servlet ? DONE : OK;
>>>>
>>>> Why setting it to DONE in the servlet case? Wouldn't that cause ap_process_request_internal to be left early?
>>>
>>> No, ap_process_request_internal() would just skip r->uri decoding (we
>>> are in pre_trans hook here, since mapping=servlet only happens there).
>>> Anyway, it's an orthogonal change sorry, maybe we still want uri
>>> decoding for directory/location walk in the servlet case, but since
>>> this patch actually modifies r->uri (while other proxy mappings do
>>> not), I thought it could be the final transformation (that's DONE
>>> returned from pre_trans).
>>
>> Sorry, but I am still struggling: This is from server/request.c starting line 233
>>
>> 233        access_status = ap_run_pre_translate_name(r);
>> 234        if (access_status != OK && access_status != DECLINED) {
>> 235            return access_status;
>> 236        }
> 
> Ah yes sorry, I missed you were referring to this initial commit.
> It was later changed (http://svn.apache.org/r1879137) like this:

Sorry my bad :-(. I missed to svn up my working copy when trying to understand the attached patch,
hence the questions. With r1879137 I am fine. Sorry for the noise.

Regards

Rüdiger



Re: svn commit: r1879080 - in /httpd/httpd/trunk/modules/proxy: mod_proxy.c mod_proxy.h

Posted by Yann Ylavic <yl...@gmail.com>.
On Fri, Jun 26, 2020 at 8:57 AM Ruediger Pluem <rp...@apache.org> wrote:
>
> On 6/25/20 2:16 PM, Yann Ylavic wrote:
> > On Thu, Jun 25, 2020 at 1:27 PM Ruediger Pluem <rp...@apache.org> wrote:
> >>
> >> On 6/25/20 12:13 PM, Yann Ylavic wrote:
> >>> Index: modules/proxy/mod_proxy.c
> >>> ===================================================================
> >>> --- modules/proxy/mod_proxy.c (revision 1879145)
> >>> +++ modules/proxy/mod_proxy.c (working copy)
> >>
> >>> @@ -987,10 +991,10 @@ PROXY_DECLARE(int) ap_proxy_trans_match(request_re
> >>>                        "URI path '%s' matches proxy handler '%s'", r->uri,
> >>>                        found);
> >>>
> >>> -        return OK;
> >>> +        return servlet ? DONE : OK;
> >>
> >> Why setting it to DONE in the servlet case? Wouldn't that cause ap_process_request_internal to be left early?
> >
> > No, ap_process_request_internal() would just skip r->uri decoding (we
> > are in pre_trans hook here, since mapping=servlet only happens there).
> > Anyway, it's an orthogonal change sorry, maybe we still want uri
> > decoding for directory/location walk in the servlet case, but since
> > this patch actually modifies r->uri (while other proxy mappings do
> > not), I thought it could be the final transformation (that's DONE
> > returned from pre_trans).
>
> Sorry, but I am still struggling: This is from server/request.c starting line 233
>
> 233        access_status = ap_run_pre_translate_name(r);
> 234        if (access_status != OK && access_status != DECLINED) {
> 235            return access_status;
> 236        }

Ah yes sorry, I missed you were referring to this initial commit.
It was later changed (http://svn.apache.org/r1879137) like this:

--- httpd/httpd/trunk/include/http_request.h (original)
+++ httpd/httpd/trunk/include/http_request.h Wed Jun 24 07:47:58 2020
@@ -366,7 +366,10 @@ AP_DECLARE_HOOK(int,create_request,(requ
  * This hook allow modules an opportunity to translate the URI into an
  * actual filename, before URL decoding happens.
  * @param r The current request
- * @return OK, DECLINED, or HTTP_...
+ * @return DECLINED to let other modules handle the pre-translation,
+ *         OK if it was handled and no other module should process it,
+ *         DONE if no further transformation should happen on the URI,
+ *         HTTP_... in case of error.
  * @ingroup hooks
  */
AP_DECLARE_HOOK(int,pre_translate_name,(request_rec *r))

--- httpd/httpd/trunk/server/request.c (original)
+++ httpd/httpd/trunk/server/request.c Wed Jun 24 07:47:58 2020
@@ -227,11 +227,11 @@ AP_DECLARE(int) ap_process_request_inter
             return access_status;
         }

-        /* Let pre_translate_name hooks work with non-decoded URIs,
-         * and eventually apply their own transformations (return OK).
+        /* Let pre_translate_name hooks work with non-decoded URIs, and
+         * eventually prevent further URI transformations (return DONE).
          */
         access_status = ap_run_pre_translate_name(r);
-        if (access_status != OK && access_status != DECLINED) {
+        if (ap_is_HTTP_ERROR(access_status)) {
             return access_status;
         }

@@ -240,7 +240,7 @@ AP_DECLARE(int) ap_process_request_inter
     }

     /* Ignore URL unescaping for translated URIs already */
-    if (access_status == DECLINED && r->parsed_uri.path) {
+    if (access_status != DONE && r->parsed_uri.path) {
         core_dir_config *d = ap_get_core_module_config(r->per_dir_config);
...

>
> In case of a successful match of a proxy mapping with servlet mapping enabled access_status would be DONE and we leave
> ap_process_request_internal via the the return in line 235

So we should be good with latest version of ap_process_request_internal().


Regards;
Yann.

Re: svn commit: r1879080 - in /httpd/httpd/trunk/modules/proxy: mod_proxy.c mod_proxy.h

Posted by Ruediger Pluem <rp...@apache.org>.

On 6/25/20 2:16 PM, Yann Ylavic wrote:
> On Thu, Jun 25, 2020 at 1:27 PM Ruediger Pluem <rp...@apache.org> wrote:
>>
>> On 6/25/20 12:13 PM, Yann Ylavic wrote:
>>> Index: modules/proxy/mod_proxy.c
>>> ===================================================================
>>> --- modules/proxy/mod_proxy.c (revision 1879145)
>>> +++ modules/proxy/mod_proxy.c (working copy)
>>
>>> @@ -987,10 +991,10 @@ PROXY_DECLARE(int) ap_proxy_trans_match(request_re
>>>                        "URI path '%s' matches proxy handler '%s'", r->uri,
>>>                        found);
>>>
>>> -        return OK;
>>> +        return servlet ? DONE : OK;
>>
>> Why setting it to DONE in the servlet case? Wouldn't that cause ap_process_request_internal to be left early?
> 
> No, ap_process_request_internal() would just skip r->uri decoding (we
> are in pre_trans hook here, since mapping=servlet only happens there).
> Anyway, it's an orthogonal change sorry, maybe we still want uri
> decoding for directory/location walk in the servlet case, but since
> this patch actually modifies r->uri (while other proxy mappings do
> not), I thought it could be the final transformation (that's DONE
> returned from pre_trans).

Sorry, but I am still struggling: This is from server/request.c starting line 233

233        access_status = ap_run_pre_translate_name(r);
234        if (access_status != OK && access_status != DECLINED) {
235            return access_status;
236        }

In case of a successful match of a proxy mapping with servlet mapping enabled access_status would be DONE and we leave
ap_process_request_internal via the the return in line 235


Regards

Rüdiger

Re: svn commit: r1879080 - in /httpd/httpd/trunk/modules/proxy: mod_proxy.c mod_proxy.h

Posted by Yann Ylavic <yl...@gmail.com>.
On Thu, Jun 25, 2020 at 1:27 PM Ruediger Pluem <rp...@apache.org> wrote:
>
> On 6/25/20 12:13 PM, Yann Ylavic wrote:
> > Index: modules/proxy/mod_proxy.c
> > ===================================================================
> > --- modules/proxy/mod_proxy.c (revision 1879145)
> > +++ modules/proxy/mod_proxy.c (working copy)
>
> > @@ -987,10 +991,10 @@ PROXY_DECLARE(int) ap_proxy_trans_match(request_re
> >                        "URI path '%s' matches proxy handler '%s'", r->uri,
> >                        found);
> >
> > -        return OK;
> > +        return servlet ? DONE : OK;
>
> Why setting it to DONE in the servlet case? Wouldn't that cause ap_process_request_internal to be left early?

No, ap_process_request_internal() would just skip r->uri decoding (we
are in pre_trans hook here, since mapping=servlet only happens there).
Anyway, it's an orthogonal change sorry, maybe we still want uri
decoding for directory/location walk in the servlet case, but since
this patch actually modifies r->uri (while other proxy mappings do
not), I thought it could be the final transformation (that's DONE
returned from pre_trans).

The only case where it matters is for encoded control/reserved chars
(unreserved are always decoded during initial normalization). So for
the authz/authn case you mentioned it means that the admin would have
to use/match the encoded form of the path (in <Location>) when the
concerned path contains reserved chars.
Something like <Location "/admin%3Bfoo"> if the final app on Tomcat is
really called "admin;foo" (which I don't recommend!), but it kind of
make sense because with servlet normalization <Location /admin;foo>
would never match..

That's all theoretical and unlikely case, but that's where we are ;)

By the way, I think a more correct patch would be this one (attached),
whether it returns DONE or OK (I kept DONE for now, until we get
further on this lovely encoding discussion :)
The change is that r->uri is rewritten in-place so that
r->parsed_uri.path gets updated too.

Regards;
Yann.

Re: svn commit: r1879080 - in /httpd/httpd/trunk/modules/proxy: mod_proxy.c mod_proxy.h

Posted by Ruediger Pluem <rp...@apache.org>.

On 6/25/20 12:13 PM, Yann Ylavic wrote:
> Index: modules/proxy/mod_proxy.c
> ===================================================================
> --- modules/proxy/mod_proxy.c	(revision 1879145)
> +++ modules/proxy/mod_proxy.c	(working copy)

> @@ -987,10 +991,10 @@ PROXY_DECLARE(int) ap_proxy_trans_match(request_re
>                        "URI path '%s' matches proxy handler '%s'", r->uri,
>                        found);
>  
> -        return OK;
> +        return servlet ? DONE : OK;

Why setting it to DONE in the servlet case? Wouldn't that cause ap_process_request_internal to be left early?

>      }
>  
> -    return DONE;
> +    return HTTP_CONTINUE;
>  }
>  
>  static int proxy_trans(request_rec *r, int pre_trans)

Regards

Rüdiger



Re: svn commit: r1879080 - in /httpd/httpd/trunk/modules/proxy: mod_proxy.c mod_proxy.h

Posted by Yann Ylavic <yl...@gmail.com>.
On Thu, Jun 25, 2020 at 9:24 AM Ruediger Pluem <rp...@apache.org> wrote:
>
> The other thing that still worries me and that I think is not covered is the following scenario:
>
> <Location /admin>
>
> ..... some authn and authz ....
>
> </Location>
>
> ProxyPass / http://
>
> Here the Apache is used to do authn and authz for the backend.
>
> /app/..;pp=somevalue/admin/nastything
>
> IMHO could circumvent this no matter what the mapping option is set to in ProxyPass

Yes, very true.

But if we want mapping=servlet to take "ownership" of r->uri, we can
do something like the attached patch.
The result is that if mod_proxy matches a servlet URI-path, r->uri
gets servlet normalized (without the path parameters) for the rest of
the request handling.
So your above example gets rewritten to "/admin/nastything" and
"<Location /admin>" now applies.

Would that be better?

Regards;
Yann

Re: svn commit: r1879080 - in /httpd/httpd/trunk/modules/proxy: mod_proxy.c mod_proxy.h

Posted by Ruediger Pluem <rp...@apache.org>.

On 6/24/20 1:09 PM, Yann Ylavic wrote:
> On Tue, Jun 23, 2020 at 9:59 AM Ruediger Pluem <rp...@apache.org> wrote:
>>
>> On 6/22/20 12:37 PM, ylavic@apache.org wrote:
>>>
>>> +/*
>>> + * Inspired by mod_jk's jk_servlet_normalize().
>>> + */
>>> +static int alias_match_servlet(apr_pool_t *p,
>>> +                               const char *uri,
>>> +                               const char *alias)
>>> +{
>>
>> The code for alias_match_servlet looks awful complex.
> 
> Yeah I know..
> 
>> I think it is this way not because it is done wrong in some way, but because
>> the problem to solve is complex. This brought me to the point if it is worth having this complexity.
> 
> I suppose that it helps to close a mod_jk vs mod_proxy_ajp gap, but I
> don't use either actually.
> It seems that Jean-Frédéric is interested in the feature, probably
> some Tomcat users too who want the proxy to isolate/restrict
> applications (paths) on the proxy.
> 
>> My understanding is that the original issue we faced was that traversal problem as HTTP and Servlet spec handle path parameters
>> differently in the '..' case. So we need to do something about
>>
>> /app/..;pp=somevalue/admin
>>
>> URI patterns. The question for me is: Are URI's that contain these patterns sensible for a servlet based application or can we
>> always assume bad intentions by the originator?
> 
> Dunno, we do not assume bad intentions for "../" patterns in
> non-servlet paths though, and normalize them.

In non-servlet paths we just normalize them in case of ../, but we just keep them in case of ..;pp=somevalue/.
I think now in the servlet case we silently ignore the path parameter and just normalize as if it was just ../

> I know about the OpenAPI specification, and you probably don't want to
> see how applications put segment parameters all over the place ;)
> All I can tell is that in the API/REST world, it's quite used (never
> saw the "..;" thing, though).
> 
>> If we always assume bad intentions we could just reject such URI's (probably only
>> if they match a ProxyPass with a flag set, for the Rewrite case we could just document a Rewrite rule that kills them).
> 
> Could be, but the nominal use case is probably the below (IMHO).
> 
>> The other case I see covered with alias_match_servlet is the case where we have path parameters on the prefix part that ProxyPass
>> should match. Without this commit
>>
>> /app;pp=something/somethingelse
>>
>> wouldn't match
>>
>> ProxyPass /app schema://backend/app
> 
> This, I suppose applications that handle path/segment parameters want
> to see the ones for /app.
> That's a real use case IMO.
> 
>>
>> But given the complexity of the code I am not sure if these cases are worth fixing or if people should just use ProyPassMatch / a
>> RewriteRule that covers such cases if the application needs that. Of course that depends on how often such cases appear. If they
>> are frequent than a direct ProxyPass support seems sensible. If they are rare probably not.
> 
> Yeah, it depends, Tomcat users are probably more able to answer this than me.
> I _think_ I got alias_match_servlet() right, sure it's complex, but we
> have it now..

Yes and this is fine. Thanks for doing. I just worry a little bit about if such complex code isn't prone to vulnerabilities.

> It can possibly be simplified a bit (store more than a single int per
> segment in the stack, so that we don't need to maintain a normalized
> path separately for rewind), but it's complex anyway I concur.
> 
> Possibly I'll need a mapping=openapi some day, at least I have a base
> implementation for that now (not sure it's interesting people either
> and that I would propose it upstream..).
> 
> Anyway, if there is a need for app isolation on the proxy with
> accurate path parameters forwarding, it's not such a bad way to do it.
> If there is no need, and we simply want to block "..;", I agree that a
> RewriteRule is more than enough.


The other thing that still worries me and that I think is not covered is the following scenario:

<Location /admin>

..... some authn and authz ....

</Location>

ProxyPass / http://

Here the Apache is used to do authn and authz for the backend.

/app/..;pp=somevalue/admin/nastything

IMHO could circumvent this no matter what the mapping option is set to in ProxyPass

Regards

Rüdiger

Re: svn commit: r1879080 - in /httpd/httpd/trunk/modules/proxy: mod_proxy.c mod_proxy.h

Posted by Yann Ylavic <yl...@gmail.com>.
On Tue, Jun 23, 2020 at 9:59 AM Ruediger Pluem <rp...@apache.org> wrote:
>
> On 6/22/20 12:37 PM, ylavic@apache.org wrote:
> >
> > +/*
> > + * Inspired by mod_jk's jk_servlet_normalize().
> > + */
> > +static int alias_match_servlet(apr_pool_t *p,
> > +                               const char *uri,
> > +                               const char *alias)
> > +{
>
> The code for alias_match_servlet looks awful complex.

Yeah I know..

> I think it is this way not because it is done wrong in some way, but because
> the problem to solve is complex. This brought me to the point if it is worth having this complexity.

I suppose that it helps to close a mod_jk vs mod_proxy_ajp gap, but I
don't use either actually.
It seems that Jean-Frédéric is interested in the feature, probably
some Tomcat users too who want the proxy to isolate/restrict
applications (paths) on the proxy.

> My understanding is that the original issue we faced was that traversal problem as HTTP and Servlet spec handle path parameters
> differently in the '..' case. So we need to do something about
>
> /app/..;pp=somevalue/admin
>
> URI patterns. The question for me is: Are URI's that contain these patterns sensible for a servlet based application or can we
> always assume bad intentions by the originator?

Dunno, we do not assume bad intentions for "../" patterns in
non-servlet paths though, and normalize them.
I know about the OpenAPI specification, and you probably don't want to
see how applications put segment parameters all over the place ;)
All I can tell is that in the API/REST world, it's quite used (never
saw the "..;" thing, though).

> If we always assume bad intentions we could just reject such URI's (probably only
> if they match a ProxyPass with a flag set, for the Rewrite case we could just document a Rewrite rule that kills them).

Could be, but the nominal use case is probably the below (IMHO).

> The other case I see covered with alias_match_servlet is the case where we have path parameters on the prefix part that ProxyPass
> should match. Without this commit
>
> /app;pp=something/somethingelse
>
> wouldn't match
>
> ProxyPass /app schema://backend/app

This, I suppose applications that handle path/segment parameters want
to see the ones for /app.
That's a real use case IMO.

>
> But given the complexity of the code I am not sure if these cases are worth fixing or if people should just use ProyPassMatch / a
> RewriteRule that covers such cases if the application needs that. Of course that depends on how often such cases appear. If they
> are frequent than a direct ProxyPass support seems sensible. If they are rare probably not.

Yeah, it depends, Tomcat users are probably more able to answer this than me.
I _think_ I got alias_match_servlet() right, sure it's complex, but we
have it now..
It can possibly be simplified a bit (store more than a single int per
segment in the stack, so that we don't need to maintain a normalized
path separately for rewind), but it's complex anyway I concur.

Possibly I'll need a mapping=openapi some day, at least I have a base
implementation for that now (not sure it's interesting people either
and that I would propose it upstream..).

Anyway, if there is a need for app isolation on the proxy with
accurate path parameters forwarding, it's not such a bad way to do it.
If there is no need, and we simply want to block "..;", I agree that a
RewriteRule is more than enough.


Regards,
Yann.

Re: svn commit: r1879080 - in /httpd/httpd/trunk/modules/proxy: mod_proxy.c mod_proxy.h

Posted by Ruediger Pluem <rp...@apache.org>.

On 6/22/20 12:37 PM, ylavic@apache.org wrote:
> Author: ylavic
> Date: Mon Jun 22 10:37:41 2020
> New Revision: 1879080
> 
> URL: http://svn.apache.org/viewvc?rev=1879080&view=rev
> Log:
> Allow for proxy servlet mapping at pre_translate_name stage.
> 
> Provide alias_match_servlet(), the servlet counterpart of alias_match(),
> which maps the request URI-path to the ProxyPass alias ignoring path
> parameters, while still forwarding them (above the alias).
> 
> This is needed to proxy servlet URIs for application handled by Tomcat,
> which can then make use of the path/segments parameters.
> 
> Github: closes #128
> 
> 
> Modified:
>     httpd/httpd/trunk/modules/proxy/mod_proxy.c
>     httpd/httpd/trunk/modules/proxy/mod_proxy.h
> 
> Modified: httpd/httpd/trunk/modules/proxy/mod_proxy.c
> URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/proxy/mod_proxy.c?rev=1879080&r1=1879079&r2=1879080&view=diff
> ==============================================================================
> --- httpd/httpd/trunk/modules/proxy/mod_proxy.c (original)
> +++ httpd/httpd/trunk/modules/proxy/mod_proxy.c Mon Jun 22 10:37:41 2020
> @@ -570,6 +570,198 @@ static int alias_match(const char *uri,
>      return urip - uri;
>  }
>  
> +/*
> + * Inspired by mod_jk's jk_servlet_normalize().
> + */
> +static int alias_match_servlet(apr_pool_t *p,
> +                               const char *uri,
> +                               const char *alias)
> +{

The code for alias_match_servlet looks awful complex. I think it is this way not because it is done wrong in some way, but because
the problem to solve is complex. This brought me to the point if it is worth having this complexity.
My understanding is that the original issue we faced was that traversal problem as HTTP and Servlet spec handle path parameters
differently in the '..' case. So we need to do something about

/app/..;pp=somevalue/admin

URI patterns. The question for me is: Are URI's that contain these patterns sensible for a servlet based application or can we
always assume bad intentions by the originator? If we always assume bad intentions we could just reject such URI's (probably only
if they match a ProxyPass with a flag set, for the Rewrite case we could just document a Rewrite rule that kills them).
The other case I see covered with alias_match_servlet is the case where we have path parameters on the prefix part that ProxyPass
should match. Without this commit

/app;pp=something/somethingelse

wouldn't match

ProxyPass /app schema://backend/app

But given the complexity of the code I am not sure if these cases are worth fixing or if people should just use ProyPassMatch / a
RewriteRule that covers such cases if the application needs that. Of course that depends on how often such cases appear. If they
are frequent than a direct ProxyPass support seems sensible. If they are rare probably not.

Regards

Rüdiger