You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@httpd.apache.org by Justin Erenkrantz <je...@ebuilt.com> on 2001/09/06 01:44:53 UTC

[PATCH] Take 3 of mod_include patch...

Okay, I've cleaned this up and I think it is ready for commit.
However, I'd really like some eyes on this.  =-)  

In Ian and Brian's testing, this does seem to make mod_include
faster.  I can't guarantee that there aren't any bugs here,
but I've tested it with what I have and looked at the code as
best as I can.  I think this gets us moving in the right
direction.

Changes since last post:
- don't call strlen on the static string (thanks Cliff!)
- move the compilation of the starting_sequence to post_config
- comment (well, tried to...)
- fixed some of the parameters to fit our type conventions.  

Comments?  Can someone who has knowledge of the bndm algorithm
verify this?  I'll try glancing at the paper tonight or tomorrow.
-- justin

Index: modules/filters/mod_include.c
===================================================================
RCS file: /home/cvs/httpd-2.0/modules/filters/mod_include.c,v
retrieving revision 1.143
diff -u -r1.143 mod_include.c
--- modules/filters/mod_include.c	2001/09/04 02:13:58	1.143
+++ modules/filters/mod_include.c	2001/09/05 23:39:18
@@ -206,6 +206,113 @@
 
 /* --------------------------- Parser functions --------------------------- */
 
+/* This is an implementation of the BNDM search algorithm.
+ *
+ * Fast and Flexible String Matching by Combining Bit-parallelism and 
+ * Suffix Automata (2001) 
+ * Gonzalo Navarro, Mathieu Raffinot
+ *
+ * http://www-igm.univ-mlv.fr/~raffinot/ftp/jea2001.ps.gz
+ *
+ * Initial code submitted by Sascha Schumann.
+ */
+   
+typedef struct {
+    unsigned int T[256];
+    unsigned int x;
+} bndm_t;
+
+/* This is the pattern matcher that holds the STARTING_SEQUENCE bndm_t
+ * structure.
+ */
+static bndm_t start_seq_pat;
+
+/* Precompile the bndm_t data structure. */
+static void bndm_compile(bndm_t *t, const char *n, apr_size_t nl)
+{
+    unsigned int x;
+    const char *ne = n + nl;
+
+    memset(t->T, 0, sizeof(unsigned int) * 256);
+    
+    for (x = 1; n < ne; x <<= 1)
+        t->T[(unsigned char) *n++] |= x;
+
+    t->x = x - 1;
+}
+
+/* Implements the BNDM search algorithm (as described above).
+ *
+ * n  - the pattern to search for
+ * nl - length of the pattern to search for
+ * h  - the string to look in
+ * hl - length of the string to look for
+ * t  - precompiled bndm structure against the pattern 
+ *
+ * Returns the count of character that is the first match or hl if no
+ * match is found.
+ */
+static int bndm(const char *n, apr_size_t nl, const char *h, apr_size_t hl, 
+                bndm_t *t)
+{
+    apr_size_t skip;
+    const char *he, *p, *pi;
+    unsigned int *T, x, d;
+
+    he = h + hl;
+
+    T = t->T;
+    x = t->x << 1;
+
+    pi = h - 1; /* pi: p initial */
+    p = pi + nl; /* compare window right to left. point to the first char */
+
+    do {
+        skip = nl;
+        d = x;
+        do {
+            d = (d >> 1) & T[(unsigned char) *p--];
+            if ((d & 1)) {
+                if (p != pi)
+                    skip = p - pi;
+                else
+                    return p - h + 1;
+            }
+        } while (d > 1);
+        p = (pi += skip) + nl; 
+    } while (p < he);
+
+    return hl;
+}
+
+/* We've now found a start sequence tag... */
+static apr_bucket* found_start_sequence(apr_bucket *dptr,
+                                          include_ctx_t *ctx, 
+                                          int tagStart)
+{
+    /* We want to split the bucket at the '<'. */
+    ctx->state = PARSE_DIRECTIVE;
+    ctx->tag_length = 0;
+    ctx->parse_pos = 0;
+    ctx->tag_start_bucket = dptr;
+    ctx->tag_start_index = tagStart;
+    if (ctx->head_start_index > 0) {
+        apr_bucket *tmp_bkt;
+
+        /* Split the bucket with the start of the tag in it */
+        apr_bucket_split(ctx->head_start_bucket, ctx->head_start_index);
+        tmp_bkt = APR_BUCKET_NEXT(ctx->head_start_bucket);
+        /* If it was a one bucket match */
+        if (dptr == ctx->head_start_bucket) {
+            ctx->tag_start_bucket = tmp_bkt;
+            ctx->tag_start_index = tagStart - ctx->head_start_index;
+        }
+        ctx->head_start_bucket = tmp_bkt;
+        ctx->head_start_index = 0;
+    }
+    return ctx->head_start_bucket;
+}
+
 /* This function returns either a pointer to the split bucket containing the
  * first byte of the BEGINNING_SEQUENCE (after finding a complete match) or it
  * returns NULL if no match found.
@@ -217,8 +324,8 @@
     const char *c;
     const char *buf;
     const char *str = STARTING_SEQUENCE;
-    apr_bucket *tmp_bkt;
-    apr_size_t  start_index;
+    int slen = sizeof(STARTING_SEQUENCE) - 1;
+    int pos;
 
     *do_cleanup = 0;
 
@@ -269,8 +376,53 @@
         if (len == 0) { /* end of pipe? */
             break;
         }
+
+        /* Set our buffer to use. */
         c = buf;
-        while (c < buf + len) {
+
+        /* The last bucket had a left over partial match that we need to
+         * complete. 
+         */
+        if (ctx->state == PARSE_HEAD)
+        {
+            apr_size_t tmpLen;
+            tmpLen = (len > (slen - 1)) ? len : (slen - 1);
+
+            while (c < buf + tmpLen && *c == str[ctx->parse_pos])
+            {
+                c++; 
+                ctx->parse_pos++;
+            }
+
+            if (str[ctx->parse_pos] == '\0')
+            {
+                ctx->bytes_parsed += c - buf;
+                return found_start_sequence(dptr, ctx, c - buf);
+            }
+
+            /* False alarm... */
+            ctx->state = PRE_HEAD;
+        }
+
+        if (len)
+        {
+            pos = bndm(str, slen, c, len, &start_seq_pat);
+            if (pos != len)
+            {
+                ctx->head_start_bucket = dptr;
+                ctx->head_start_index = pos;
+                ctx->bytes_parsed += pos + slen;
+                return found_start_sequence(dptr, ctx, pos + slen);
+            }
+        }
+        
+        /* Consider the case where we have <!-- at the end of the bucket. */
+        ctx->bytes_parsed += len - slen;
+        ctx->parse_pos = 0;
+
+        c = buf + len - slen + 1;
+        while (c < buf + len)
+        {
             if (*c == str[ctx->parse_pos]) {
                 if (ctx->state == PRE_HEAD) {
                     ctx->state = PARSE_HEAD;
@@ -279,48 +431,24 @@
                 }
                 ctx->parse_pos++;
             }
-            else {
-                if (str[ctx->parse_pos] == '\0') {
-                    /* We want to split the bucket at the '<'. */
-                    ctx->bytes_parsed++;
-                    ctx->state = PARSE_DIRECTIVE;
-                    ctx->tag_length = 0;
-                    ctx->parse_pos = 0;
-                    ctx->tag_start_bucket = dptr;
-                    ctx->tag_start_index = c - buf;
-                    if (ctx->head_start_index > 0) {
-                        start_index = (c - buf) - ctx->head_start_index;
-                        apr_bucket_split(ctx->head_start_bucket, 
-                                         ctx->head_start_index);
-                        tmp_bkt = APR_BUCKET_NEXT(ctx->head_start_bucket);
-                        if (dptr == ctx->head_start_bucket) {
-                            ctx->tag_start_bucket = tmp_bkt;
-                            ctx->tag_start_index = start_index;
-                        }
-                        ctx->head_start_bucket = tmp_bkt;
-                        ctx->head_start_index = 0;
-                    }
-                    return ctx->head_start_bucket;
+            else if (ctx->parse_pos != 0) 
+            {
+                /* The reason for this, is that we need to make sure 
+                 * that we catch cases like <<!--#.  This makes the 
+                 * second check after the original check fails.
+                 * If parse_pos was already 0 then we already checked this.
+                 */
+                /* FIXME: Why? */
+                *do_cleanup = 1;
+                if (*c == str[0]) {
+                    ctx->parse_pos = 1;
+                    ctx->head_start_index = c - buf;
                 }
-                else if (ctx->parse_pos != 0) {
-                    /* The reason for this, is that we need to make sure 
-                     * that we catch cases like <<!--#.  This makes the 
-                     * second check after the original check fails.
-                     * If parse_pos was already 0 then we already checked this.
-                     */
-                    *do_cleanup = 1;
-                    if (*c == str[0]) {
-                        ctx->parse_pos = 1;
-                        ctx->state = PARSE_HEAD;
-                        ctx->head_start_bucket = dptr;
-                        ctx->head_start_index = c - buf;
-                    }
-                    else {
-                        ctx->parse_pos = 0;
-                        ctx->state = PRE_HEAD;
-                        ctx->head_start_bucket = NULL;
-                        ctx->head_start_index = 0;
-                    }
+                else {
+                    ctx->parse_pos = 0;
+                    ctx->state = PRE_HEAD;
+                    ctx->head_start_bucket = NULL;
+                    ctx->head_start_index = 0;
                 }
             }
             c++;
@@ -2970,6 +3098,10 @@
                                 apr_pool_t *ptemp, server_rec *s)
 {
     include_hash = apr_hash_make(p);
+
+    /* compile the pattern used by find_start_sequence */
+    bndm_compile(&start_seq_pat, STARTING_SEQUENCE, 
+                 sizeof(STARTING_SEQUENCE)-1); 
 
     ssi_pfn_register = APR_RETRIEVE_OPTIONAL_FN(ap_register_include_handler);
 


Re: [PATCH] Take 3 of mod_include patch...

Posted by Ian Holsman <ia...@cnet.com>.
john sachs wrote:

> i applied this patch and the mod_include test fails in the same spot as it has been.
> content file has:
> <!--#include file="extra/inc-extra1.shtml"-->+


Hi John.
This patch was not intended to fix this problem,
it was intended to speed up the 'finding' of the '<!--#' tag
itself.
errors you might find due to this patch is that some SSI tags are not
getting evaluated (with the SSI tag stil in the output)

a couple of testcase would be something like
handling a string "<!--#--><!--#x-->"
or "<<!--#"
..Ian


> 
> 'include file' with relative path to file not in same path as the file you are requesting.
> 
> causes segv.  here is stacktrace:
> 
> #0  0x80c4056 in ap_getparents (
>     name=0x816a840 "INTERNALLY GENERATED file-relative req") at util.c:488
> #1  0x80d327f in ap_process_request_internal (r=0x82720e4) at request.c:152
> #2  0x80d4bd6 in ap_sub_req_lookup_file (
>     new_file=0xbfffafc4 "extra/inc-extra1.shtml", r=0x8277194,
>     next_filter=0x8278294) at request.c:1688
> #3  0x807718f in handle_include (ctx=0x826d054, bb=0xbfffd470, r=0x8277194,
>     f=0x827826c, head_ptr=0x8277128, inserted_head=0xbfffd40c)
>     at mod_include.c:1033
> #4  0x807b182 in send_parsed_content (bb=0xbfffd470, r=0x8277194, f=0x827826c)
>     at mod_include.c:2840
> #5  0x807bb32 in includes_filter (f=0x827826c, b=0x82783f4)
>     at mod_include.c:3081
> #6  0x80c9db4 in ap_pass_brigade (next=0x827826c, bb=0x82783f4)
>     at util_filter.c:275
> #7  0x80d1b9b in default_handler (r=0x8277194) at core.c:3068
> #8  0x80bd501 in ap_run_handler (r=0x8277194) at config.c:185
> #9  0x80bdbcf in ap_invoke_handler (r=0x8277194) at config.c:344
> #10 0x80958b1 in ap_process_request (r=0x8277194) at http_request.c:286
> #11 0x80907e8 in ap_process_http_connection (c=0x820c3ac) at http_core.c:287
> #12 0x80c8015 in ap_run_process_connection (c=0x820c3ac) at connection.c:82
> #13 0x80c8240 in ap_process_connection (c=0x820c3ac) at connection.c:219
> #14 0x80bc03c in child_main (child_num_arg=0) at prefork.c:829
> #15 0x80bc19d in make_child (s=0x8196f4c, slot=0) at prefork.c:916
> #16 0x80bc223 in startup_children (number_to_start=1) at prefork.c:939
> #17 0x80bc6ce in ap_mpm_run (_pconf=0x8195804, plog=0x81cd9c4, s=0x8196f4c)
>     at prefork.c:1155
> #18 0x80c20bc in main (argc=6, argv=0xbffff664) at main.c:431
> #19 0x40105577 in __libc_start_main () from /lib/libc.so.6
> 
> -j
> 




Re: [PATCH] Take 3 of mod_include patch...

Posted by Justin Erenkrantz <je...@ebuilt.com>.
On Wed, Sep 05, 2001 at 06:42:10PM -0700, john sachs wrote:
> i applied this patch and the mod_include test fails in the same spot as it has been.
> content file has:
> <!--#include file="extra/inc-extra1.shtml"-->
> 
> 'include file' with relative path to file not in same path as the file you are requesting.

Well, in my defense, my patch wasn't supposed to fix *that*.

But, while I'm in there, I'll take a shot at trying to fix this as
OtherBill hasn't had a chance to yet.  =-)  -- justin


[PATCH] mod_include fix

Posted by Ryan Bloom <rb...@covalent.net>.
Well, I finally made myself look at this and fix it.  This passed the httpd-test
cases now, and it looks correct to me.  I would like to apply this ASAP.

I dislike that we have to pstrdup the "" string, but we try to modify that string too
often to use a constant string.

Ryan

Index: server/request.c
===================================================================
RCS file: /home/cvs/httpd-2.0/server/request.c,v
retrieving revision 1.48
diff -u -d -b -w -u -r1.48 request.c
--- server/request.c	2001/09/01 05:21:16	1.48
+++ server/request.c	2001/09/06 16:21:33
@@ -1670,7 +1670,7 @@
          * but it's actually sometimes to impossible to do it... because the
          * file may not have a uri associated with it -djg
          */
-        rnew->uri = "INTERNALLY GENERATED file-relative req";
+        rnew->uri = apr_pstrdup(rnew->pool, "");
 
 #if 0 /* XXX When this is reenabled, the cache triggers need to be set to faux
        * dir_walk/file_walk values.
Index: server/util.c
===================================================================
RCS file: /home/cvs/httpd-2.0/server/util.c,v
retrieving revision 1.112
diff -u -d -b -w -u -r1.112 util.c
--- server/util.c	2001/08/24 01:41:56	1.112
+++ server/util.c	2001/09/06 16:21:33
@@ -615,7 +619,7 @@
     int l;
 
     if (last_slash == NULL) {
-	return NULL;
+	return apr_pstrdup(p, "");
     }
     l = (last_slash - s) + 1;
     d = apr_palloc(p, l + 1);

______________________________________________________________
Ryan Bloom				rbb@apache.org
Covalent Technologies			rbb@covalent.net
--------------------------------------------------------------

Re: [PATCH] Take 3 of mod_include patch...

Posted by Brian Pane <bp...@pacbell.net>.
Ryan Bloom wrote:

>>>I have some big problems with the way that location walk and directory
>>>walk work now, BTW, because if I write a module that doesn't get pages
>>>from the filesystem, I have to catch those in the map_to_storage hook,
>>>or the server will 500.
>>>
>>Hmm... I'd have thought that was the whole point of the map_to_storage
>>hook, if its name were any indication...  <shrug>
>>
>
>It is, but if I am just putting together a quick module, to solve a problem and
>it generates the page itself, all I should have to do, is create a handler, like
>I did in 1.3.  If I also have to create a map_to_storage hook, then we will
>have broken a lot of 1.3 modules when they try to port, and I can't see a good
>reason for that.
>
>The map_to_storage hook should be an optimization that I want to use, not
>a requirement that I HAVE to use.
>
I disagree on this point.  For people who put their document root on an 
NFS server,
doing stats of the nonexistent file corresponding to your module's URI 
will be a
performance problem.  In my experience, it's a big enough performance 
problem that
the optimization shouldn't be considered optional.

--Brian



Re: [PATCH] Take 3 of mod_include patch...

Posted by Ryan Bloom <rb...@covalent.net>.
On Thursday 06 September 2001 11:54, Bill Stoddard wrote:
> > On Thursday 06 September 2001 09:36, Ian Holsman wrote:
> > > On Thu, 2001-09-06 at 08:12, Ryan Bloom wrote:
> > > > > > I have some big problems with the way that location walk and
> > > > > > directory walk work now, BTW, because if I write a module that
> > > > > > doesn't get pages from the filesystem, I have to catch those in
> > > > > > the map_to_storage hook, or the server will 500.
> > > > >
> > > > > Hmm... I'd have thought that was the whole point of the
> > > > > map_to_storage hook, if its name were any indication...  <shrug>
> > > >
> > > > It is, but if I am just putting together a quick module, to solve a
> > > > problem and it generates the page itself, all I should have to do, is
> > > > create a handler, like I did in 1.3.  If I also have to create a
> > > > map_to_storage hook, then we will have broken a lot of 1.3 modules
> > > > when they try to port, and I can't see a good reason for that.
> > > >
> > > > The map_to_storage hook should be an optimization that I want to use,
> > > > not a requirement that I HAVE to use.
> > >
> > > you don't need to use it.
> > > look at mod-status/mod-info
> > > both requests aren't handled by the serving a file from the
> > > file-system. they >could< write a map-to-storage hook which would
> > > return 'OK' if the handler is status/info.
> > > but at the moment they don't and they let it go through to the default
> > > map_to_storage hook implementations, directory_walk and file_walk.
> > >
> > > I'm not seeing why a module developer would need to care about
> > > map_to_storage unless they want serve the file from say proxy,
> > > or a oracle DB for instance.
> >
> > Neither of those modules is trying to translate the name though. Mod_jk,
> > and I'm assuming mod_webapp, both do name translation, but not
> > map_to_storage. I should be able to completely forget map_to_storage
> > exists if I want to.
>
> What kind of name translation is mod_jk attempting to do?  If I recall
> correctly, mod_jk sets r->filename to NULL -during the name translation
> phase- if it determines it will handle the request.  Setting r->filename =
> NULL during translation is a trick specificially designed to prevent
> directory_walk from running on a URL that mod_jk -knows- is not in the file
> system. The correct way to implement this performance optimization in
> mod_jk for httpd 2.0 is to -replace- the translation phase hook with a
> map_to_storage hook.  mod_jk makes the determination about whether it is
> going to handle the request in map_to_storage. If mod_jk can handle the
> request (content is a servlet), then it returns OK on the map_to_storage
> call which prevents all the other map_to_storage hooks from running.

Yep, which is what Bill and I did last night.  I don't like forcing modules to
do this though.  If they want the optimization, then they need to use
map_to_storage.

Ryan

______________________________________________________________
Ryan Bloom				rbb@apache.org
Covalent Technologies			rbb@covalent.net
--------------------------------------------------------------

Re: [PATCH] Take 3 of mod_include patch...

Posted by Bill Stoddard <bi...@wstoddard.com>.
> On Thursday 06 September 2001 09:36, Ian Holsman wrote:
> > On Thu, 2001-09-06 at 08:12, Ryan Bloom wrote:
> > > > > I have some big problems with the way that location walk and
> > > > > directory walk work now, BTW, because if I write a module that
> > > > > doesn't get pages from the filesystem, I have to catch those in the
> > > > > map_to_storage hook, or the server will 500.
> > > >
> > > > Hmm... I'd have thought that was the whole point of the map_to_storage
> > > > hook, if its name were any indication...  <shrug>
> > >
> > > It is, but if I am just putting together a quick module, to solve a
> > > problem and it generates the page itself, all I should have to do, is
> > > create a handler, like I did in 1.3.  If I also have to create a
> > > map_to_storage hook, then we will have broken a lot of 1.3 modules when
> > > they try to port, and I can't see a good reason for that.
> > >
> > > The map_to_storage hook should be an optimization that I want to use, not
> > > a requirement that I HAVE to use.
> >
> > you don't need to use it.
> > look at mod-status/mod-info
> > both requests aren't handled by the serving a file from the file-system.
> > they >could< write a map-to-storage hook which would return 'OK' if the
> > handler is status/info.
> > but at the moment they don't and they let it go through to the default
> > map_to_storage hook implementations, directory_walk and file_walk.
> >
> > I'm not seeing why a module developer would need to care about
> > map_to_storage unless they want serve the file from say proxy,
> > or a oracle DB for instance.
>
> Neither of those modules is trying to translate the name though. Mod_jk, and I'm
> assuming mod_webapp, both do name translation, but not map_to_storage.
> I should be able to completely forget map_to_storage exists if I want to.
>

What kind of name translation is mod_jk attempting to do?  If I recall correctly, mod_jk
sets r->filename to NULL -during the name translation phase- if it determines it will
handle the request.  Setting r->filename = NULL during translation is a trick
specificially designed to prevent directory_walk from running on a URL that mod_jk -knows-
is not in the file system. The correct way to implement this performance optimization in
mod_jk for httpd 2.0 is to -replace- the translation phase hook with a map_to_storage
hook.  mod_jk makes the determination about whether it is going to handle the request in
map_to_storage. If mod_jk can handle the request (content is a servlet), then it returns
OK on the map_to_storage call which prevents all the other map_to_storage hooks from
running.

Bill

Bill


Re: [PATCH] Take 3 of mod_include patch...

Posted by Ryan Bloom <rb...@covalent.net>.
On Thursday 06 September 2001 09:36, Ian Holsman wrote:
> On Thu, 2001-09-06 at 08:12, Ryan Bloom wrote:
> > > > I have some big problems with the way that location walk and
> > > > directory walk work now, BTW, because if I write a module that
> > > > doesn't get pages from the filesystem, I have to catch those in the
> > > > map_to_storage hook, or the server will 500.
> > >
> > > Hmm... I'd have thought that was the whole point of the map_to_storage
> > > hook, if its name were any indication...  <shrug>
> >
> > It is, but if I am just putting together a quick module, to solve a
> > problem and it generates the page itself, all I should have to do, is
> > create a handler, like I did in 1.3.  If I also have to create a
> > map_to_storage hook, then we will have broken a lot of 1.3 modules when
> > they try to port, and I can't see a good reason for that.
> >
> > The map_to_storage hook should be an optimization that I want to use, not
> > a requirement that I HAVE to use.
>
> you don't need to use it.
> look at mod-status/mod-info
> both requests aren't handled by the serving a file from the file-system.
> they >could< write a map-to-storage hook which would return 'OK' if the
> handler is status/info.
> but at the moment they don't and they let it go through to the default
> map_to_storage hook implementations, directory_walk and file_walk.
>
> I'm not seeing why a module developer would need to care about
> map_to_storage unless they want serve the file from say proxy,
> or a oracle DB for instance.

Neither of those modules is trying to translate the name though. Mod_jk, and I'm
assuming mod_webapp, both do name translation, but not map_to_storage.
I should be able to completely forget map_to_storage exists if I want to.

Ryan
______________________________________________________________
Ryan Bloom				rbb@apache.org
Covalent Technologies			rbb@covalent.net
--------------------------------------------------------------

Re: [PATCH] Take 3 of mod_include patch...

Posted by "William A. Rowe, Jr." <wr...@rowe-clan.net>.
From: "Ian Holsman" <ia...@cnet.com>
Sent: Thursday, September 06, 2001 9:36 AM


> On Thu, 2001-09-06 at 08:12, Ryan Bloom wrote:
> > 
> > The map_to_storage hook should be an optimization that I want to use, not
> > a requirement that I HAVE to use.
> > 
>
> you don't need to use it.  look at mod-status/mod-info 
> both requests aren't handled by the serving a file from the file-system.
> they >could< write a map-to-storage hook which would return 'OK' if the
> handler is status/info. 
> but at the moment they don't and they let it go through to the default
> map_to_storage hook implementations, directory_walk and file_walk.

You read this rightly.

> I'm not seeing why a module developer would need to care about
> map_to_storage unless they want serve the file from say proxy, 
> or a oracle DB for instance.

Performance :)


Re: [PATCH] Take 3 of mod_include patch...

Posted by Ian Holsman <ia...@cnet.com>.
On Thu, 2001-09-06 at 08:12, Ryan Bloom wrote:
> 
> > > I have some big problems with the way that location walk and directory
> > > walk work now, BTW, because if I write a module that doesn't get pages
> > > from the filesystem, I have to catch those in the map_to_storage hook,
> > > or the server will 500.
> >
> > Hmm... I'd have thought that was the whole point of the map_to_storage
> > hook, if its name were any indication...  <shrug>
> 
> It is, but if I am just putting together a quick module, to solve a problem and
> it generates the page itself, all I should have to do, is create a handler, like
> I did in 1.3.  If I also have to create a map_to_storage hook, then we will
> have broken a lot of 1.3 modules when they try to port, and I can't see a good
> reason for that.
> 
> The map_to_storage hook should be an optimization that I want to use, not
> a requirement that I HAVE to use.
> 
you don't need to use it.
look at mod-status/mod-info 
both requests aren't handled by the serving a file from the file-system.
they >could< write a map-to-storage hook which would return 'OK' if the
handler is status/info. 
but at the moment they don't and they let it go through to the default
map_to_storage hook implementations, directory_walk and file_walk.

I'm not seeing why a module developer would need to care about
map_to_storage unless they want serve the file from say proxy, 
or a oracle DB for instance.

..Ian
> Ryan
> 
> ______________________________________________________________
> Ryan Bloom				rbb@apache.org
> Covalent Technologies			rbb@covalent.net
> --------------------------------------------------------------
-- 
Ian Holsman          IanH@cnet.com
Performance Measurement & Analysis
CNET Networks   -   (415) 364-8608


Re: [PATCH] Take 3 of mod_include patch...

Posted by Cliff Woolley <cl...@yahoo.com>.
On Thu, 6 Sep 2001, Ryan Bloom wrote:

> > Hmm... I'd have thought that was the whole point of the map_to_storage
> > hook, if its name were any indication...  <shrug>
>
> It is, but if I am just putting together a quick module, to solve a
> problem and it generates the page itself, all I should have to do, is
> create a handler, like I did in 1.3.  If I also have to create a
> map_to_storage hook, then we will have broken a lot of 1.3 modules
> when they try to port, and I can't see a good reason for that.
>
> The map_to_storage hook should be an optimization that I want to use,
> not a requirement that I HAVE to use.

Ahh, yes, well that makes sense.  I agree.

--Cliff

--------------------------------------------------------------
   Cliff Woolley
   cliffwoolley@yahoo.com
   Charlottesville, VA



Re: [PATCH] Take 3 of mod_include patch...

Posted by Ryan Bloom <rb...@covalent.net>.
> > I have some big problems with the way that location walk and directory
> > walk work now, BTW, because if I write a module that doesn't get pages
> > from the filesystem, I have to catch those in the map_to_storage hook,
> > or the server will 500.
>
> Hmm... I'd have thought that was the whole point of the map_to_storage
> hook, if its name were any indication...  <shrug>

It is, but if I am just putting together a quick module, to solve a problem and
it generates the page itself, all I should have to do, is create a handler, like
I did in 1.3.  If I also have to create a map_to_storage hook, then we will
have broken a lot of 1.3 modules when they try to port, and I can't see a good
reason for that.

The map_to_storage hook should be an optimization that I want to use, not
a requirement that I HAVE to use.

Ryan

______________________________________________________________
Ryan Bloom				rbb@apache.org
Covalent Technologies			rbb@covalent.net
--------------------------------------------------------------

Re: [PATCH] Take 3 of mod_include patch...

Posted by Cliff Woolley <cl...@yahoo.com>.
On Thu, 6 Sep 2001, Ryan Bloom wrote:

> I actually hit this same seg fault in mod_jk late last night.  If
> mod_include is changed to fix this, then we are most likely doing it
> wrong, and I will veto that.

My point exactly.

> The first step is to set r->uri to NULL if it is INTERNALLY GENERATED.
> Then, we have to figure out where that is seg faulting, and fix that
> location.

Yep.

> I have some big problems with the way that location walk and directory
> walk work now, BTW, because if I write a module that doesn't get pages
> from the filesystem, I have to catch those in the map_to_storage hook,
> or the server will 500.

Hmm... I'd have thought that was the whole point of the map_to_storage
hook, if its name were any indication...  <shrug>

--Cliff

--------------------------------------------------------------
   Cliff Woolley
   cliffwoolley@yahoo.com
   Charlottesville, VA



Re: [PATCH] Take 3 of mod_include patch...

Posted by Ryan Bloom <rb...@covalent.net>.
On Wednesday 05 September 2001 20:24, Cliff Woolley wrote:
> On Wed, 5 Sep 2001, john sachs wrote:
> > i applied this patch and the mod_include test fails in the same spot
> > as it has been. content file has: <!--#include
> > file="extra/inc-extra1.shtml"-->
> >
> > 'include file' with relative path to file not in same path as the file
> > you are requesting.
>
> Yep.  This patch has no effect on that.  It's probably not mod_include
> that needs patching, but rather the core.  I was hoping OtherBill could
> enlighten us on what the Right Way to fix it is... because I have very
> little idea myself.
>
> Last I heard, Bill suggested we just set r->uri=NULL in that case and just
> test for (r->uri) in all places we use r->uri, rather than setting r->uri
> to some bogus string.  I'm all for it, if we're convinced that that's the
> right thing to do.  I'll take a stab at patching it that way tomorrow just
> for grins.

I actually hit this same seg fault in mod_jk late last night.  If mod_include is
changed to fix this, then we are most likely doing it wrong, and I will veto that.
The first step is to set r->uri to NULL if it is INTERNALLY GENERATED.  Then,
we have to figure out where that is seg faulting, and fix that location.  I have
some big problems with the way that location walk and directory walk work
now, BTW, because if I write a module that doesn't get pages from the
filesystem, I have to catch those in the map_to_storage hook, or the server
will 500.

Ryan

______________________________________________________________
Ryan Bloom				rbb@apache.org
Covalent Technologies			rbb@covalent.net
--------------------------------------------------------------

Re: [PATCH] Take 3 of mod_include patch...

Posted by Cliff Woolley <cl...@yahoo.com>.
On Wed, 5 Sep 2001, john sachs wrote:

> i applied this patch and the mod_include test fails in the same spot
> as it has been. content file has: <!--#include
> file="extra/inc-extra1.shtml"-->
>
> 'include file' with relative path to file not in same path as the file
> you are requesting.

Yep.  This patch has no effect on that.  It's probably not mod_include
that needs patching, but rather the core.  I was hoping OtherBill could
enlighten us on what the Right Way to fix it is... because I have very
little idea myself.

Last I heard, Bill suggested we just set r->uri=NULL in that case and just
test for (r->uri) in all places we use r->uri, rather than setting r->uri
to some bogus string.  I'm all for it, if we're convinced that that's the
right thing to do.  I'll take a stab at patching it that way tomorrow just
for grins.

--Cliff


Re: [PATCH] Take 3 of mod_include patch...

Posted by john sachs <js...@covalent.net>.
i applied this patch and the mod_include test fails in the same spot as it has been.
content file has:
<!--#include file="extra/inc-extra1.shtml"-->

'include file' with relative path to file not in same path as the file you are requesting.

causes segv.  here is stacktrace:

#0  0x80c4056 in ap_getparents (
    name=0x816a840 "INTERNALLY GENERATED file-relative req") at util.c:488
#1  0x80d327f in ap_process_request_internal (r=0x82720e4) at request.c:152
#2  0x80d4bd6 in ap_sub_req_lookup_file (
    new_file=0xbfffafc4 "extra/inc-extra1.shtml", r=0x8277194,
    next_filter=0x8278294) at request.c:1688
#3  0x807718f in handle_include (ctx=0x826d054, bb=0xbfffd470, r=0x8277194,
    f=0x827826c, head_ptr=0x8277128, inserted_head=0xbfffd40c)
    at mod_include.c:1033
#4  0x807b182 in send_parsed_content (bb=0xbfffd470, r=0x8277194, f=0x827826c)
    at mod_include.c:2840
#5  0x807bb32 in includes_filter (f=0x827826c, b=0x82783f4)
    at mod_include.c:3081
#6  0x80c9db4 in ap_pass_brigade (next=0x827826c, bb=0x82783f4)
    at util_filter.c:275
#7  0x80d1b9b in default_handler (r=0x8277194) at core.c:3068
#8  0x80bd501 in ap_run_handler (r=0x8277194) at config.c:185
#9  0x80bdbcf in ap_invoke_handler (r=0x8277194) at config.c:344
#10 0x80958b1 in ap_process_request (r=0x8277194) at http_request.c:286
#11 0x80907e8 in ap_process_http_connection (c=0x820c3ac) at http_core.c:287
#12 0x80c8015 in ap_run_process_connection (c=0x820c3ac) at connection.c:82
#13 0x80c8240 in ap_process_connection (c=0x820c3ac) at connection.c:219
#14 0x80bc03c in child_main (child_num_arg=0) at prefork.c:829
#15 0x80bc19d in make_child (s=0x8196f4c, slot=0) at prefork.c:916
#16 0x80bc223 in startup_children (number_to_start=1) at prefork.c:939
#17 0x80bc6ce in ap_mpm_run (_pconf=0x8195804, plog=0x81cd9c4, s=0x8196f4c)
    at prefork.c:1155
#18 0x80c20bc in main (argc=6, argv=0xbffff664) at main.c:431
#19 0x40105577 in __libc_start_main () from /lib/libc.so.6

-j

Re: [PATCH] Take 3 of mod_include patch...

Posted by Ian Holsman <ia...@cnet.com>.
On Wed, 2001-09-05 at 19:11, Justin Erenkrantz wrote:
> On Wed, Sep 05, 2001 at 06:46:45PM -0700, Brian Pane wrote:
> > Justin Erenkrantz wrote:
> > [...]

getting back to the original patch
to find_start_sequence.

does anyone have any comments on this?
I'm seeing lower CPU utilization when using this.
and the edge cases are handled by the old 'slower' method
..Ian
> > 
> > >+/* Implements the BNDM search algorithm (as described above).
> > >+ *
> > >+ * n  - the pattern to search for
> > >+ * nl - length of the pattern to search for
> > >+ * h  - the string to look in
> > >+ * hl - length of the string to look for
> > >+ * t  - precompiled bndm structure against the pattern 
> > >+ *
> > >+ * Returns the count of character that is the first match or hl if no
> > >+ * match is found.
> > >+ */
> > >+static int bndm(const char *n, apr_size_t nl, const char *h, apr_size_t hl, 
> > >+                bndm_t *t)
> > >+{
> > >+    apr_size_t skip;
> > >+    const char *he, *p, *pi;
> > >+    unsigned int *T, x, d;
> > >+
> > >+    he = h + hl;
> > >+
> > >+    T = t->T;
> > >+    x = t->x << 1;
> > >+
> > >+    pi = h - 1; /* pi: p initial */
> > >+    p = pi + nl; /* compare window right to left. point to the first char */
> > >+
> > >+    do {
> > >+        skip = nl;
> > >+        d = x;
> > >+        do {
> > >+            d = (d >> 1) & T[(unsigned char) *p--];
> > >+            if ((d & 1)) {
> > >+                if (p != pi)
> > >+                    skip = p - pi;
> > >+                else
> > >+                    return p - h + 1;
> > >+            }
> > >+        } while (d > 1);
> > >+        p = (pi += skip) + nl; 
> > >+    } while (p < he);
> > >
> > If nl > hl (e.g., if the caller tries to search for a 5-byte pattern in
> > a 3-byte string), the first loop iteration will look at memory beyond
> > the end of the string.  Should this be a while loop instead of do-while?
> > (Or is the caller responsible for avoiding this case?)
> 
> Good point.  I'll make it a while loop instead.  If we start out past
> the end of h, we know we don't have a match.  For mod_include, the
> edge cases should pick up if that is part of a tag spanning buckets.
> -- justin
-- 
Ian Holsman          IanH@cnet.com
Performance Measurement & Analysis
CNET Networks   -   (415) 364-8608


Re: [PATCH] Take 3 of mod_include patch...

Posted by Brian Pane <bp...@pacbell.net>.
Justin Erenkrantz wrote:
[...]

>Actually, I think the conditional should be:
>
>while (p <= he)
>
>Thoughts?  We're scanning R->L, so p points to the end of the string.
>It is possible to have "<!--#" as n (which should match).  -- justin
>
I think (p < he) is still the right conditional; 'he' points to
the address right after the end of the string to be scanned
(because of the initialization he = h + hl).

--Brian





Re: [PATCH] Take 3 of mod_include patch...

Posted by Justin Erenkrantz <je...@ebuilt.com>.
On Wed, Sep 05, 2001 at 07:15:13PM -0700, Justin Erenkrantz wrote:
> Actually, I think the conditional should be:
> 
> while (p <= he)
> 
> Thoughts?  We're scanning R->L, so p points to the end of the string.
> It is possible to have "<!--#" as n (which should match).  -- justin

No.  I'm wrong.  I'll shut up now.  

It should be while (p < he).  he points past the end of the string.
-- justin


Re: [PATCH] Take 3 of mod_include patch...

Posted by Justin Erenkrantz <je...@ebuilt.com>.
On Wed, Sep 05, 2001 at 07:11:37PM -0700, Justin Erenkrantz wrote:
> On Wed, Sep 05, 2001 at 06:46:45PM -0700, Brian Pane wrote:
> > Justin Erenkrantz wrote:
> > [...]
> > 
> > >+/* Implements the BNDM search algorithm (as described above).
> > >+ *
> > >+ * n  - the pattern to search for
> > >+ * nl - length of the pattern to search for
> > >+ * h  - the string to look in
> > >+ * hl - length of the string to look for
> > >+ * t  - precompiled bndm structure against the pattern 
> > >+ *
> > >+ * Returns the count of character that is the first match or hl if no
> > >+ * match is found.
> > >+ */
> > >+static int bndm(const char *n, apr_size_t nl, const char *h, apr_size_t hl, 
> > >+                bndm_t *t)
> > >+{
> > >+    apr_size_t skip;
> > >+    const char *he, *p, *pi;
> > >+    unsigned int *T, x, d;
> > >+
> > >+    he = h + hl;
> > >+
> > >+    T = t->T;
> > >+    x = t->x << 1;
> > >+
> > >+    pi = h - 1; /* pi: p initial */
> > >+    p = pi + nl; /* compare window right to left. point to the first char */
> > >+
> > >+    do {
> > >+        skip = nl;
> > >+        d = x;
> > >+        do {
> > >+            d = (d >> 1) & T[(unsigned char) *p--];
> > >+            if ((d & 1)) {
> > >+                if (p != pi)
> > >+                    skip = p - pi;
> > >+                else
> > >+                    return p - h + 1;
> > >+            }
> > >+        } while (d > 1);
> > >+        p = (pi += skip) + nl; 
> > >+    } while (p < he);
> > >
> > If nl > hl (e.g., if the caller tries to search for a 5-byte pattern in
> > a 3-byte string), the first loop iteration will look at memory beyond
> > the end of the string.  Should this be a while loop instead of do-while?
> > (Or is the caller responsible for avoiding this case?)
> 
> Good point.  I'll make it a while loop instead.  If we start out past
> the end of h, we know we don't have a match.  For mod_include, the
> edge cases should pick up if that is part of a tag spanning buckets.

Actually, I think the conditional should be:

while (p <= he)

Thoughts?  We're scanning R->L, so p points to the end of the string.
It is possible to have "<!--#" as n (which should match).  -- justin


Re: [PATCH] Take 3 of mod_include patch...

Posted by Justin Erenkrantz <je...@ebuilt.com>.
On Wed, Sep 05, 2001 at 06:46:45PM -0700, Brian Pane wrote:
> Justin Erenkrantz wrote:
> [...]
> 
> >+/* Implements the BNDM search algorithm (as described above).
> >+ *
> >+ * n  - the pattern to search for
> >+ * nl - length of the pattern to search for
> >+ * h  - the string to look in
> >+ * hl - length of the string to look for
> >+ * t  - precompiled bndm structure against the pattern 
> >+ *
> >+ * Returns the count of character that is the first match or hl if no
> >+ * match is found.
> >+ */
> >+static int bndm(const char *n, apr_size_t nl, const char *h, apr_size_t hl, 
> >+                bndm_t *t)
> >+{
> >+    apr_size_t skip;
> >+    const char *he, *p, *pi;
> >+    unsigned int *T, x, d;
> >+
> >+    he = h + hl;
> >+
> >+    T = t->T;
> >+    x = t->x << 1;
> >+
> >+    pi = h - 1; /* pi: p initial */
> >+    p = pi + nl; /* compare window right to left. point to the first char */
> >+
> >+    do {
> >+        skip = nl;
> >+        d = x;
> >+        do {
> >+            d = (d >> 1) & T[(unsigned char) *p--];
> >+            if ((d & 1)) {
> >+                if (p != pi)
> >+                    skip = p - pi;
> >+                else
> >+                    return p - h + 1;
> >+            }
> >+        } while (d > 1);
> >+        p = (pi += skip) + nl; 
> >+    } while (p < he);
> >
> If nl > hl (e.g., if the caller tries to search for a 5-byte pattern in
> a 3-byte string), the first loop iteration will look at memory beyond
> the end of the string.  Should this be a while loop instead of do-while?
> (Or is the caller responsible for avoiding this case?)

Good point.  I'll make it a while loop instead.  If we start out past
the end of h, we know we don't have a match.  For mod_include, the
edge cases should pick up if that is part of a tag spanning buckets.
-- justin


Re: [PATCH] Take 3 of mod_include patch...

Posted by Brian Pane <bp...@pacbell.net>.
Justin Erenkrantz wrote:
[...]

>+/* Implements the BNDM search algorithm (as described above).
>+ *
>+ * n  - the pattern to search for
>+ * nl - length of the pattern to search for
>+ * h  - the string to look in
>+ * hl - length of the string to look for
>+ * t  - precompiled bndm structure against the pattern 
>+ *
>+ * Returns the count of character that is the first match or hl if no
>+ * match is found.
>+ */
>+static int bndm(const char *n, apr_size_t nl, const char *h, apr_size_t hl, 
>+                bndm_t *t)
>+{
>+    apr_size_t skip;
>+    const char *he, *p, *pi;
>+    unsigned int *T, x, d;
>+
>+    he = h + hl;
>+
>+    T = t->T;
>+    x = t->x << 1;
>+
>+    pi = h - 1; /* pi: p initial */
>+    p = pi + nl; /* compare window right to left. point to the first char */
>+
>+    do {
>+        skip = nl;
>+        d = x;
>+        do {
>+            d = (d >> 1) & T[(unsigned char) *p--];
>+            if ((d & 1)) {
>+                if (p != pi)
>+                    skip = p - pi;
>+                else
>+                    return p - h + 1;
>+            }
>+        } while (d > 1);
>+        p = (pi += skip) + nl; 
>+    } while (p < he);
>
If nl > hl (e.g., if the caller tries to search for a 5-byte pattern in
a 3-byte string), the first loop iteration will look at memory beyond
the end of the string.  Should this be a while loop instead of do-while?
(Or is the caller responsible for avoiding this case?)

--Brian