You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@httpd.apache.org by ni...@apache.org on 2008/09/03 01:01:48 UTC

svn commit: r691418 [1/2] - in /httpd/httpd/trunk: ./ docs/manual/mod/ modules/filters/

Author: niq
Date: Tue Sep  2 16:01:47 2008
New Revision: 691418

URL: http://svn.apache.org/viewvc?rev=691418&view=rev
Log:
Commit mod_sed: enable filtering of HTTP Requests and Responses through sed

Added:
    httpd/httpd/trunk/docs/manual/mod/mod_sed.xml
    httpd/httpd/trunk/modules/filters/libsed.h
    httpd/httpd/trunk/modules/filters/mod_sed.c
    httpd/httpd/trunk/modules/filters/regexp.c
    httpd/httpd/trunk/modules/filters/regexp.h
    httpd/httpd/trunk/modules/filters/sed.h
    httpd/httpd/trunk/modules/filters/sed0.c
    httpd/httpd/trunk/modules/filters/sed1.c
Modified:
    httpd/httpd/trunk/CHANGES
    httpd/httpd/trunk/modules/filters/config.m4

Modified: httpd/httpd/trunk/CHANGES
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/CHANGES?rev=691418&r1=691417&r2=691418&view=diff
==============================================================================
--- httpd/httpd/trunk/CHANGES [utf-8] (original)
+++ httpd/httpd/trunk/CHANGES [utf-8] Tue Sep  2 16:01:47 2008
@@ -2,6 +2,9 @@
 Changes with Apache 2.3.0
 [ When backported to 2.2.x, remove entry from this file ]
 
+  *) New module mod_sed: filter Request/Response bodies through sed
+     [Basant Kumar Kukreja <basant.kukreja sun.com>]
+
   *) mod_auth_form: Make sure that basic authentication is correctly
      faked directly after login. [Graham Leggett]
 

Added: httpd/httpd/trunk/docs/manual/mod/mod_sed.xml
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/docs/manual/mod/mod_sed.xml?rev=691418&view=auto
==============================================================================
--- httpd/httpd/trunk/docs/manual/mod/mod_sed.xml (added)
+++ httpd/httpd/trunk/docs/manual/mod/mod_sed.xml Tue Sep  2 16:01:47 2008
@@ -0,0 +1,141 @@
+<?xml version="1.0"?>
+<!DOCTYPE modulesynopsis SYSTEM "../style/modulesynopsis.dtd">
+<?xml-stylesheet type="text/xsl" href="../style/manual.en.xsl"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<modulesynopsis metafile="mod_sed.xml.meta">
+
+<name>mod_sed</name>
+<description>Filtering Input (request) and Output (response) content using sed commands</description>
+<status>Experimental</status>
+<sourcefile>mod_sed.c sed0.c sed1.c regexp.c regexp.h sed.h</sourcefile>
+<identifier>sed_module</identifier>
+<compatibility>Available in Apache 2.3 and later</compatibility>
+
+<summary>
+<p>
+mod_sed is a in-process content filter. mod_sed filter implement the sed edit
+commands implemented by Solaris 10 sed
+program as described in <a href="http://docs.sun.com/app/docs/doc/816-5165/sed-1b?a=view">man
+page</a>. However unlike sed, mod_sed doesn't take data from
+standard
+input. Instead filter act on the entity data sent between client and
+server. mod_sed can be used as a input or output filter. mod_sed is a
+content filter which means that it can not be used to modify client or
+server http headers.
+</p>
+<p>
+mod_sed output filter accept a chunk of data and execute the sed scripts on data and generates the output which is passed to next filter in the filter chain.
+</p>
+
+<p>
+mod_sed input filter reads the data from next filter in filter chain and executes the sed scripts and returns the generated data to caller filter in the filter chain.
+</p>
+
+<p>
+Both input and output filter only process the data if new line character is seen in the content. At the end of the data, rest of the data is treated as last line.
+</p>
+
+<p>A tutorial article on mod_sed, and why it is more powerful than simple
+string or regular expression search and replace, is available in <a
+href="http://blogs.sun.com/basant/entry/using_mod_sed_to_filter">on
+the author's blog</a>.</p>
+
+</summary>
+
+<directivesynopsis>
+<name>OutputSed</name>
+<description>Sed command for filter the response content</description>
+<syntax>OutputSed <var>sed-command</var></syntax>
+<contextlist><context>directory</context><context>.htaccess</context>
+</contextlist>
+
+<usage>
+    <p>The <directive>OutputSed</directive> directive specify the sed
+    command which will be executed on the response.
+    </p>
+</usage>
+</directivesynopsis>
+
+<directivesynopsis>
+<name>InputSed</name>
+<description>Sed command to filter the request data (typically post data)</description>
+<syntax>InputSed <var>sed-command</var></syntax>
+<contextlist><context>directory</context><context>.htaccess</context>
+</contextlist>
+
+<usage>
+    <p>The <directive>InputSed</directive> directive specify the sed command
+    which will be executed on the request data e.g POST data.
+    </p>
+</usage>
+</directivesynopsis>
+
+<section id="sampleconf"><title>Sample Configuration</title>
+    <example><title>Adding a output filter </title>
+         # In following example, sed filter will replace the string <br />
+         # "monday" to "MON" and the string "sunday" to SUN in html document <br />
+         # before sending to client. <br />
+        <indent>
+        &lt;Directory "/var/www/docs/sed"&gt; <br />
+           <indent>
+           AddOutputFilter Sed html <br />
+           OutputSed "s/monday/MON/g" <br />
+           OutputSed "s/sunday/SUN/g" <br />
+           </indent>
+        &lt;/Directory&gt; <br />
+        </indent>
+    </example>
+
+    <example><title>Adding a input filter </title>
+         # In following example, sed filter will replace the string <br />
+         # "monday" to "MON" and the string "sunday" to SUN in the POST data <br />
+         # sent to php <br />
+        <indent>
+        &lt;Directory "/var/www/docs/sed"&gt; <br />
+           <indent>
+           AddInputFilter Sed php <br />
+           OutputSed "s/monday/MON/g" <br />
+           OutputSed "s/sunday/SUN/g" <br />
+           </indent>
+        &lt;/Directory&gt; <br />
+        </indent>
+    </example>
+</section>
+<section id="sed_commands"><title>Sed Commands</title>
+    <p>
+    Complete details of the sed command can be found from
+   <a href="http://docs.sun.com/app/docs/doc/816-5165/sed-1b?a=view">sed man
+page</a>.
+    </p>
+    <dl>
+        <dt><code>b</code></dt>
+        <dd>branch to the label specified (Similar to goto)</dd>
+        <dt><code>h</code></dt>
+        <dd>Copy the current line to hold buffer.</dd>
+        <dt><code>H</code></dt>
+        <dd>Append the current line to hold buffer.</dd>
+        <dt><code>g</code></dt>
+        <dd>Copy the hold buffer into the current line</dd>
+        <dt><code>G</code></dt>
+        <dd>Append the hold buffer into the current line</dd>
+        <dt><code>x</code></dt>
+        <dd>Swap the content of hold buffer and current line</dd>
+    </dl>
+</section>
+</modulesynopsis>

Modified: httpd/httpd/trunk/modules/filters/config.m4
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/filters/config.m4?rev=691418&r1=691417&r2=691418&view=diff
==============================================================================
--- httpd/httpd/trunk/modules/filters/config.m4 (original)
+++ httpd/httpd/trunk/modules/filters/config.m4 Tue Sep  2 16:01:47 2008
@@ -10,6 +10,9 @@
 APACHE_MODULE(filter, Smart Filtering, , , yes)
 APACHE_MODULE(substitute, response content rewrite-like filtering, , , most)
 
+sed_obj="mod_sed.lo sed0.lo sed1.lo regexp.lo"
+APACHE_MODULE(sed, filter request and/or response bodies through sed, $sed_obj)
+
 if test "$ac_cv_ebcdic" = "yes"; then
 # mod_charset_lite can be very useful on an ebcdic system,
 #   so include it by default

Added: httpd/httpd/trunk/modules/filters/libsed.h
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/filters/libsed.h?rev=691418&view=auto
==============================================================================
--- httpd/httpd/trunk/modules/filters/libsed.h (added)
+++ httpd/httpd/trunk/modules/filters/libsed.h Tue Sep  2 16:01:47 2008
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2005, 2008 Sun Microsystems, Inc. All Rights Reserved.
+ * Use is subject to license terms.
+ *
+ *	Copyright (c) 1984 AT&T
+ *	  All Rights Reserved  	
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *  http://www.apache.org/licenses/LICENSE-2.0. 
+ * 
+ * Unless required by applicable law or agreed to in writing, software 
+ * distributed under the License is distributed on an "AS IS" BASIS, 
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 
+ * or implied. 
+ * See the License for the specific language governing permissions and
+ * limitations under the License. 
+ */
+
+#ifndef LIBSED_H
+#define LIBSED_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <limits.h>
+
+#include "apr_file_io.h"
+#ifndef PATH_MAX
+#define PATH_MAX MAX_PATH
+#endif
+
+#define SED_NLINES 256
+#define SED_DEPTH 20
+#define SED_LABSIZE 50
+#define SED_ABUFSIZE 20
+
+typedef struct sed_reptr_s sed_reptr_t;
+
+struct sed_reptr_s {
+    sed_reptr_t *next;
+    char        *ad1;
+    char        *ad2;
+    char        *re1;
+    sed_reptr_t *lb1;
+    char        *rhs;
+    int         findex;
+    char        command;
+    int         gfl;
+    char        pfl;
+    char        negfl;
+    int         nrep;
+};
+
+typedef struct sed_label_s sed_label_t;
+
+struct sed_label_s {
+    char        asc[9];
+    sed_reptr_t *chain;
+    sed_reptr_t *address;
+};
+
+typedef void (sed_err_fn_t)(void *data, const char *error);
+typedef void (sed_write_fn_t)(void *ctx, char *buf, int sz);
+
+typedef struct sed_commands_s sed_commands_t;
+#define NWFILES 11 /* 10 plus one for standard output */
+
+struct sed_commands_s {
+    sed_err_fn_t *errfn;
+    void         *data;
+
+    unsigned     lsize;
+    char         *linebuf;
+    char         *lbend;
+    const char   *saveq;
+
+    char         *cp;
+    char         *lastre;
+    char         *respace;
+    char         sseof;
+    char         *reend;
+    const char   *earg;
+    int          eflag;
+    int          gflag;
+    int          nflag;
+    apr_int64_t  tlno[SED_NLINES];
+    int          nlno;
+    int          depth;
+
+    char         *fname[NWFILES];
+    int          nfiles;
+
+    sed_label_t  ltab[SED_LABSIZE];
+    sed_label_t  *labtab;
+    sed_label_t  *lab;
+    sed_label_t  *labend;
+
+    sed_reptr_t  **cmpend[SED_DEPTH];
+    sed_reptr_t  *ptrspace;
+    sed_reptr_t  *ptrend;
+    sed_reptr_t  *rep;
+    int          nrep;
+    apr_pool_t   *pool;
+    int          canbefinal;
+};
+
+typedef struct sed_eval_s sed_eval_t;
+
+struct sed_eval_s {
+    sed_err_fn_t   *errfn;
+    sed_write_fn_t *writefn;
+    void           *data;
+
+    sed_commands_t *commands;
+
+    apr_int64_t    lnum;
+    void           *fout;
+
+    unsigned       lsize;
+    char           *linebuf;
+    char           *lspend;
+
+    unsigned       hsize;
+    char           *holdbuf;
+    char           *hspend;
+
+    unsigned       gsize;
+    char           *genbuf;
+    char           *lcomend;
+
+    apr_file_t    *fcode[NWFILES];
+    sed_reptr_t    *abuf[SED_ABUFSIZE];
+    sed_reptr_t    **aptr;
+    sed_reptr_t    *pending;
+    unsigned char  *inar;
+    int            nrep;
+
+    int            dolflag;
+    int            sflag;
+    int            jflag;
+    int            delflag;
+    int            lreadyflag;
+    int            quitflag;
+    int            finalflag;
+    int            numpass;
+    int            nullmatch;
+    int            col;
+    apr_pool_t     *pool;
+};
+
+apr_status_t sed_init_commands(sed_commands_t *commands, sed_err_fn_t *errfn, void *data,
+                               apr_pool_t *p);
+apr_status_t sed_compile_string(sed_commands_t *commands, const char *s);
+apr_status_t sed_compile_file(sed_commands_t *commands, apr_file_t *fin);
+char* sed_get_finalize_error(const sed_commands_t *commands, apr_pool_t* pool);
+int sed_canbe_finalized(const sed_commands_t *commands);
+void sed_destroy_commands(sed_commands_t *commands);
+
+apr_status_t sed_init_eval(sed_eval_t *eval, sed_commands_t *commands,
+                           sed_err_fn_t *errfn, void *data,
+                           sed_write_fn_t *writefn, apr_pool_t *p);
+apr_status_t sed_reset_eval(sed_eval_t *eval, sed_commands_t *commands, sed_err_fn_t *errfn, void *data);
+apr_status_t sed_eval_buffer(sed_eval_t *eval, const char *buf, int bufsz, void *fout);
+apr_status_t sed_eval_file(sed_eval_t *eval, apr_file_t *fin, void *fout);
+apr_status_t sed_finalize_eval(sed_eval_t *eval, void *f);
+void sed_destroy_eval(sed_eval_t *eval);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* LIBSED_H */

Added: httpd/httpd/trunk/modules/filters/mod_sed.c
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/filters/mod_sed.c?rev=691418&view=auto
==============================================================================
--- httpd/httpd/trunk/modules/filters/mod_sed.c (added)
+++ httpd/httpd/trunk/modules/filters/mod_sed.c Tue Sep  2 16:01:47 2008
@@ -0,0 +1,437 @@
+/*
+ * Copyright (c) 2005, 2008 Sun Microsystems, Inc. All Rights Reserved.
+ * Use is subject to license terms.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *  http://www.apache.org/licenses/LICENSE-2.0. 
+ * 
+ * Unless required by applicable law or agreed to in writing, software 
+ * distributed under the License is distributed on an "AS IS" BASIS, 
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 
+ * or implied. 
+ * See the License for the specific language governing permissions and
+ * limitations under the License. 
+ */
+
+#include "httpd.h"
+#include "http_config.h"
+#include "http_log.h"
+#include "apr_strings.h"
+#include "apr_general.h"
+#include "util_filter.h"
+#include "apr_buckets.h"
+#include "http_request.h"
+#include "libsed.h"
+
+static const char *sed_filter_name = "Sed";
+#define MODSED_OUTBUF_SIZE 4000
+
+typedef struct sed_expr_config
+{
+    sed_commands_t *sed_cmds;
+    const char *last_error;
+} sed_expr_config;
+
+typedef struct sed_config
+{
+    sed_expr_config output;
+    sed_expr_config input;
+} sed_config;
+
+/* Context for filter invocation for single HTTP request */
+typedef struct sed_filter_ctxt
+{
+    sed_eval_t eval;
+    request_rec *r;
+    apr_bucket_brigade *bb;
+    char *outbuf;
+    char *curoutbuf;
+    int bufsize;
+} sed_filter_ctxt;
+
+module AP_MODULE_DECLARE_DATA sed_module;
+
+/* This function will be call back from libsed functions if there is any error
+ * happend during execution of sed scripts
+ */
+static void log_sed_errf(void *data, const char *error)
+{
+    request_rec *r = (request_rec *) data;
+    ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, error);
+}
+
+/* This function will be call back from libsed functions if there is any
+ * compilation error.
+ */
+static void sed_compile_errf(void *data, const char *error)
+{
+    sed_expr_config *sed_cfg = (sed_expr_config *) data;
+    sed_cfg->last_error = error;
+}
+
+/*
+ * flush_output_buffer
+ * Flush the  output data (stored in ctx->outbuf)
+ */
+static void flush_output_buffer(sed_filter_ctxt *ctx, char* buf, int sz)
+{
+    int size = ctx->curoutbuf - ctx->outbuf;
+    char *out;
+    apr_bucket *b;
+    if (size + sz <= 0)
+        return;
+    out = apr_palloc(ctx->r->pool, size + sz);
+    if (size) {
+        memcpy(out, ctx->outbuf, size);
+    }
+    if (buf && (sz > 0)) {
+        memcpy(out + size, buf, sz);
+    }
+    /* Reset the output buffer position */
+    ctx->curoutbuf = ctx->outbuf;
+    b = apr_bucket_pool_create(out, size + sz, ctx->r->pool,
+                               ctx->r->connection->bucket_alloc);
+    APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
+}
+
+/* This is a call back function. When libsed wants to generate the output,
+ * this function will be invoked.
+ */
+static void sed_write_output(void *dummy, char *buf, int sz)
+{
+    /* dummy is basically filter context. Context is passed during invocation
+     * of sed_eval_buffer
+     */
+    sed_filter_ctxt *ctx = (sed_filter_ctxt *) dummy;
+    if (((ctx->curoutbuf - ctx->outbuf) + sz) >= ctx->bufsize) {
+        /* flush current buffer */
+        flush_output_buffer(ctx, buf, sz);
+    }
+    else {
+        memcpy(ctx->curoutbuf, buf, sz);
+        ctx->curoutbuf += sz;
+    }
+}
+
+/* Compile a sed expression. Compiled context is saved in sed_cfg->sed_cmds.
+ * Memory required for compilation context is allocated from cmd->pool.
+ */
+static apr_status_t compile_sed_expr(sed_expr_config *sed_cfg,
+                                     cmd_parms *cmd,
+                                     const char *expr)
+{
+    apr_status_t status = APR_SUCCESS;
+
+    if (!sed_cfg->sed_cmds) {
+        sed_commands_t *sed_cmds;
+        sed_cmds = apr_pcalloc(cmd->pool, sizeof(sed_commands_t));
+        status = sed_init_commands(sed_cmds, sed_compile_errf, sed_cfg,
+                                   cmd->pool);
+        if (status != APR_SUCCESS) {
+            sed_destroy_commands(sed_cmds);
+            return status;
+        }
+        sed_cfg->sed_cmds = sed_cmds;
+    }
+    status = sed_compile_string(sed_cfg->sed_cmds, expr);
+    if (status != APR_SUCCESS) {
+        sed_destroy_commands(sed_cfg->sed_cmds);
+        sed_cfg->sed_cmds = NULL;
+    }
+    return status;
+}
+
+/* sed eval cleanup function */
+static apr_status_t sed_eval_cleanup(void *data)
+{
+    sed_eval_t *eval = (sed_eval_t *) data;
+    sed_destroy_eval(eval);
+    return APR_SUCCESS;
+}
+
+/* Initialize sed filter context. If successful then context is set in f->ctx
+ */
+static apr_status_t init_context(ap_filter_t *f, sed_expr_config *sed_cfg)
+{
+    apr_status_t status;
+    sed_filter_ctxt* ctx;
+    request_rec *r = f->r;
+    /* Create the context. Call sed_init_eval. libsed will generated
+     * output by calling sed_write_output and generates any error by
+     * invoking log_sed_errf.
+     */
+    ctx = apr_pcalloc(r->pool, sizeof(sed_filter_ctxt));
+    ctx->r = r;
+    ctx->bb = NULL;
+    status = sed_init_eval(&ctx->eval, sed_cfg->sed_cmds, log_sed_errf,
+                           r, &sed_write_output, r->pool);
+    if (status != APR_SUCCESS) {
+        return status;
+    }
+    apr_pool_cleanup_register(r->pool, &ctx->eval, sed_eval_cleanup,
+                              apr_pool_cleanup_null);
+    ctx->bufsize = MODSED_OUTBUF_SIZE;
+    ctx->outbuf = apr_palloc(r->pool, ctx->bufsize + 1);
+    ctx->curoutbuf = ctx->outbuf;
+    f->ctx = ctx;
+    return APR_SUCCESS;
+}
+
+/* Entry function for Sed output filter */
+static apr_status_t sed_response_filter(ap_filter_t *f,
+                                        apr_bucket_brigade *bb)
+{
+    apr_bucket *b;
+    apr_status_t status;
+    sed_config *cfg = ap_get_module_config(f->r->per_dir_config,
+                                           &sed_module);
+    sed_filter_ctxt *ctx = f->ctx;
+    sed_expr_config *sed_cfg = &cfg->output;
+
+    if ((sed_cfg == NULL) || (sed_cfg->sed_cmds == NULL)) {
+        /* No sed expressions */
+        ap_remove_output_filter(f);
+        return ap_pass_brigade(f->next, bb);
+    }
+
+    if (ctx == NULL) {
+
+        if (APR_BUCKET_IS_EOS(APR_BRIGADE_FIRST(bb))) {
+            /* no need to run sed filter for Head requests */
+            ap_remove_output_filter(f);
+            return ap_pass_brigade(f->next, bb);
+        }
+
+        status = init_context(f, sed_cfg);
+        if (status != APR_SUCCESS)
+             return status;
+        ctx = f->ctx;
+    }
+
+    ctx->bb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
+
+    /* Here is the main logic. Iterate through all the buckets, read the
+     * content of the bucket, call sed_eval_buffer on the data.
+     * sed_eval_buffer will read the data line by line, run filters on each
+     * line. sed_eval_buffer will generates the output by calling
+     * sed_write_output which will add the output to ctx->bb. At the end of
+     * the loop, ctx->bb is passed to the next filter in chain. At the end of
+     * the data, if new line is not found then sed_eval_buffer will store the
+     * data in it's own buffer.
+     *
+     * Once eos bucket is found then sed_finalize_eval will flush the rest of
+     * the data. If there is no new line in last line of data, new line is
+     * appended (that is a solaris sed behavior). libsed's internal memory for
+     * evaluation is allocated on request's pool so it will be cleared once
+     * request is over.
+     *
+     * If flush bucket is found then append the the flush bucket to ctx->bb
+     * and pass it to next filter. There may be some data which will still be
+     * in sed's internal buffer which can't be flushed until new line
+     * character is arrived.
+     */
+    for (b = APR_BRIGADE_FIRST(bb); b != APR_BRIGADE_SENTINEL(bb);) {
+        const char *buf = NULL;
+        apr_size_t bytes = 0;
+        if (APR_BUCKET_IS_EOS(b)) {
+            apr_bucket *b1 = APR_BUCKET_NEXT(b);
+            /* Now clean up the internal sed buffer */
+            sed_finalize_eval(&ctx->eval, ctx);
+            flush_output_buffer(ctx, NULL, 0);
+            APR_BUCKET_REMOVE(b);
+            /* Insert the eos bucket to ctx->bb brigade */
+            APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
+            b = b1;
+        }
+        else if (APR_BUCKET_IS_FLUSH(b)) {
+            apr_bucket *b1 = APR_BUCKET_NEXT(b);
+            APR_BUCKET_REMOVE(b);
+            APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
+            status = ap_pass_brigade(f->next, ctx->bb);
+            apr_brigade_cleanup(ctx->bb);
+            if (status != APR_SUCCESS) {
+                return status;
+            }
+            b = b1;
+        }
+        else if (APR_BUCKET_IS_METADATA(b)) {
+            b = APR_BUCKET_NEXT(b);
+        }
+        else if (apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ)
+                 == APR_SUCCESS) {
+            apr_bucket *b1 = APR_BUCKET_NEXT(b);
+            status = sed_eval_buffer(&ctx->eval, buf, bytes, ctx);
+            if (status != APR_SUCCESS) {
+                return status;
+            }
+            flush_output_buffer(ctx, NULL, 0);
+            APR_BUCKET_REMOVE(b);
+            apr_bucket_delete(b);
+            b = b1;
+        }
+        else {
+            apr_bucket *b1 = APR_BUCKET_NEXT(b);
+            APR_BUCKET_REMOVE(b);
+            b = b1;
+        }
+    }
+    apr_brigade_cleanup(bb);
+    return ap_pass_brigade(f->next, ctx->bb);
+}
+
+/* Entry function for Sed input filter */
+static apr_status_t sed_request_filter(ap_filter_t *f,
+                                       apr_bucket_brigade *bb,
+                                       ap_input_mode_t mode,
+                                       apr_read_type_e block,
+                                       apr_off_t readbytes)
+{
+    sed_config *cfg = ap_get_module_config(f->r->per_dir_config,
+                                           &sed_module);
+    sed_filter_ctxt *ctx = f->ctx;
+    apr_status_t status;
+    sed_expr_config *sed_cfg = &cfg->input;
+
+    if (mode != AP_MODE_READBYTES) {
+        return ap_get_brigade(f->next, bb, mode, block, readbytes);
+    }
+
+    if ((sed_cfg == NULL) || (sed_cfg->sed_cmds == NULL)) {
+        /* No sed expression */
+        return ap_get_brigade(f->next, bb, mode, block, readbytes);
+    }
+
+    if (!ctx) {
+        if (!ap_is_initial_req(f->r)) {
+            ap_remove_input_filter(f);
+            /* XXX : Should we filter the sub requests too */
+            return ap_get_brigade(f->next, bb, mode, block, readbytes);
+        }
+        status = init_context(f, sed_cfg);
+        if (status != APR_SUCCESS)
+             return status;
+        ctx = f->ctx;
+        ctx->bb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
+    }
+
+    /* Here is the logic :
+     * Read the readbytes data from next level fiter into bbinp. Loop through
+     * the buckets in bbinp and read the data from buckets and invoke
+     * sed_eval_buffer on the data. libsed will generate it's output using
+     * sed_write_output which will add data in ctx->bb. Do it until it have
+     * atleast one bucket bucket in ctx->bb. At the end of data eos bucket
+     * should be there.
+     *
+     * Once eos bucket is seen, then invoke sed_finalize_eval to clear the
+     * output. If the last byte of data is not a new line character then sed
+     * will add a new line to the data that is default sed behaviour. Note
+     * that using this filter with POST data, caller may not expect this
+     * behaviour.
+     *
+     * If next level fiter generate the flush bucket, we can't do much about
+     * it. If we want to return the flush bucket in brigade bb (to the caller)
+     * the question is where to add it?
+     */
+    while (APR_BRIGADE_EMPTY(ctx->bb)) {
+        apr_bucket_brigade *bbinp;
+        apr_bucket *b;
+
+        /* read the bytes from next level filter */
+        bbinp = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
+        status = ap_get_brigade(f->next, bbinp, mode, block, readbytes);
+        if (status != APR_SUCCESS) {
+            return status;
+        }
+        for (b = APR_BRIGADE_FIRST(bbinp); b != APR_BRIGADE_SENTINEL(bbinp);
+             b = APR_BUCKET_NEXT(b)) {
+            const char *buf = NULL;
+            apr_size_t bytes;
+
+            if (APR_BUCKET_IS_EOS(b)) {
+                /* eos bucket. Clear the internal sed buffers */
+                sed_finalize_eval(&ctx->eval, ctx);
+                flush_output_buffer(ctx, NULL, 0);
+                APR_BUCKET_REMOVE(b);
+                APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
+                break;
+            }
+            else if (APR_BUCKET_IS_FLUSH(b)) {
+                /* What should we do with flush bucket */
+                continue;
+            }
+            if (apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ)
+                     == APR_SUCCESS) {
+                status = sed_eval_buffer(&ctx->eval, buf, bytes, ctx);
+                if (status != APR_SUCCESS)
+                    return status;
+                flush_output_buffer(ctx, NULL, 0);
+            }
+        }
+        apr_brigade_cleanup(bbinp);
+        apr_brigade_destroy(bbinp);
+    }
+
+    if (!APR_BRIGADE_EMPTY(ctx->bb)) {
+        apr_bucket_brigade *newbb = NULL;
+        apr_bucket *b = NULL;
+
+        /* This may return APR_INCOMPLETE which should be fine */
+        apr_brigade_partition(ctx->bb, readbytes, &b);
+
+        newbb = apr_brigade_split(ctx->bb, b);
+        APR_BRIGADE_CONCAT(bb, ctx->bb);
+        APR_BRIGADE_CONCAT(ctx->bb, newbb);
+    }
+    return APR_SUCCESS;
+}
+
+static const char *sed_add_expr(cmd_parms *cmd, void *cfg, const char *arg)
+{
+    int offset = (int) (long) cmd->info;
+    sed_expr_config *sed_cfg = 
+                (sed_expr_config *) (((char *) cfg) + offset);
+    if (compile_sed_expr(sed_cfg, cmd, arg) != APR_SUCCESS) {
+        return apr_psprintf(cmd->temp_pool,
+                            "Failed to compile sed expression. %s",
+                            sed_cfg->last_error);
+    }
+    return NULL;
+}
+
+static void *create_sed_dir_config(apr_pool_t *p, char *s)
+{
+    sed_config *cfg = apr_pcalloc(p, sizeof(sed_config));
+    return cfg;
+}
+
+static const command_rec sed_filter_cmds[] = {
+    AP_INIT_TAKE1("OutputSed", sed_add_expr,
+                  (void *) APR_OFFSETOF(sed_config, output),
+                  ACCESS_CONF,
+                  "Sed regular expression for Response"),
+    AP_INIT_TAKE1("InputSed", sed_add_expr,
+                  (void *) APR_OFFSETOF(sed_config, input),
+                  ACCESS_CONF,
+                  "Sed regular expression for Request"),
+    {NULL}
+};
+
+static void register_hooks(apr_pool_t *p)
+{
+    ap_register_output_filter(sed_filter_name, sed_response_filter, NULL,
+                              AP_FTYPE_RESOURCE);
+    ap_register_input_filter(sed_filter_name, sed_request_filter, NULL,
+                             AP_FTYPE_RESOURCE);
+}
+
+module AP_MODULE_DECLARE_DATA sed_module = {
+    STANDARD20_MODULE_STUFF,
+    create_sed_dir_config,      /* dir config creater */
+    NULL,                       /* dir merger --- default is to override */
+    NULL,                       /* server config */
+    NULL,                       /* merge server config */
+    sed_filter_cmds,            /* command table */
+    register_hooks              /* register hooks */
+};

Added: httpd/httpd/trunk/modules/filters/regexp.c
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/filters/regexp.c?rev=691418&view=auto
==============================================================================
--- httpd/httpd/trunk/modules/filters/regexp.c (added)
+++ httpd/httpd/trunk/modules/filters/regexp.c Tue Sep  2 16:01:47 2008
@@ -0,0 +1,601 @@
+/*
+ * Copyright (c) 2005, 2008 Sun Microsystems, Inc. All Rights Reserved.
+ * Use is subject to license terms.
+ *
+ *	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	
+ *	  All Rights Reserved  	
+ *
+ * University Copyright- Copyright (c) 1982, 1986, 1988
+ * The Regents of the University of California
+ * All Rights Reserved
+ *
+ * University Acknowledgment- Portions of this document are derived from
+ * software developed by the University of California, Berkeley, and its
+ * contributors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *  http://www.apache.org/licenses/LICENSE-2.0. 
+ * 
+ * Unless required by applicable law or agreed to in writing, software 
+ * distributed under the License is distributed on an "AS IS" BASIS, 
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 
+ * or implied. 
+ * See the License for the specific language governing permissions and
+ * limitations under the License. 
+ */
+
+/* Code moved from regexp.h */
+
+#include "apr.h"
+#include "apr_lib.h"
+#ifdef APR_HAVE_LIMITS_H
+#include <limits.h>
+#endif
+#if APR_HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+#include "libsed.h"
+#include "regexp.h"
+#include "sed.h"
+
+#define GETC() ((unsigned char)*sp++)
+#define PEEKC() ((unsigned char)*sp)
+#define UNGETC(c) (--sp)
+#define SEDCOMPILE_ERROR(c) { \
+            regerrno = c; \
+            goto out; \
+            }
+#define ecmp(s1, s2, n)    (strncmp(s1, s2, n) == 0)
+#define uletter(c) (isalpha(c) || c == '_')
+
+
+static unsigned char bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
+
+static int regerr(sed_commands_t *commands, int err);
+static void comperr(sed_commands_t *commands, char *msg);
+static void getrnge(char *str, step_vars_storage *vars);
+static int _advance(char *, char *, step_vars_storage *);
+extern int sed_step(char *p1, char *p2, int circf, step_vars_storage *vars);
+
+
+static void comperr(sed_commands_t *commands, char *msg)
+{
+    command_errf(commands, msg, commands->linebuf);
+}
+
+/*
+*/
+static int regerr(sed_commands_t *commands, int err)
+{
+    switch(err) {
+    case 0:
+        /* No error */
+        break;
+    case 11:
+        comperr(commands, "Range endpoint too large: %s");
+        break;
+
+    case 16:
+        comperr(commands, "Bad number: %s");
+        break;
+
+    case 25:
+        comperr(commands, "``\\digit'' out of range: %s");
+        break;
+
+    case 36:
+        comperr(commands, "Illegal or missing delimiter: %s");
+        break;
+
+    case 41:
+        comperr(commands, "No remembered search string: %s");
+        break;
+
+    case 42:
+        comperr(commands, "\\( \\) imbalance: %s");
+        break;
+
+    case 43:
+        comperr(commands, "Too many \\(: %s");
+        break;
+
+    case 44:
+        comperr(commands, "More than 2 numbers given in \\{ \\}: %s");
+        break;
+
+    case 45:
+        comperr(commands, "} expected after \\: %s");
+        break;
+
+    case 46:
+        comperr(commands, "First number exceeds second in \\{ \\}: %s");
+        break;
+
+    case 49:
+        comperr(commands, "[ ] imbalance: %s");
+        break;
+
+    case 50:
+        comperr(commands, SEDERR_TMMES);
+        break;
+
+    default:
+        comperr(commands, "Unknown regexp error code %s\n");
+        break;
+    }
+    return (0);
+}
+
+
+char *sed_compile(sed_commands_t *commands, sed_comp_args *compargs,
+                  char *ep, char *endbuf, int seof)
+{
+    int c;
+    int eof = seof;
+    char *lastep;
+    int cclcnt;
+    char bracket[NBRA], *bracketp;
+    int closed;
+    int neg;
+    int lc;
+    int i, cflg;
+    int iflag; /* used for non-ascii characters in brackets */
+    int nodelim = 0;
+    char *sp = commands->cp;
+    int regerrno = 0;
+
+    lastep = 0;
+    if ((c = GETC()) == eof || c == '\n') {
+        if (c == '\n') {
+            UNGETC(c);
+            nodelim = 1;
+        }
+        commands->cp = sp;
+        goto out;
+    }
+    bracketp = bracket;
+    compargs->circf = closed = compargs->nbra = 0;
+    if (c == '^')
+        compargs->circf++;
+    else
+        UNGETC(c);
+    while (1) {
+        if (ep >= endbuf)
+            SEDCOMPILE_ERROR(50);
+        c = GETC();
+        if (c != '*' && ((c != '\\') || (PEEKC() != '{')))
+            lastep = ep;
+        if (c == eof) {
+            *ep++ = CCEOF;
+            if (bracketp != bracket)
+                SEDCOMPILE_ERROR(42);
+            commands->cp = sp;
+            goto out;
+        }
+        switch (c) {
+
+        case '.':
+            *ep++ = CDOT;
+            continue;
+
+        case '\n':
+            SEDCOMPILE_ERROR(36);
+            commands->cp = sp;
+            goto out;
+        case '*':
+            if (lastep == 0 || *lastep == CBRA || *lastep == CKET)
+                goto defchar;
+            *lastep |= STAR;
+            continue;
+
+        case '$':
+            if (PEEKC() != eof && PEEKC() != '\n')
+                goto defchar;
+            *ep++ = CDOL;
+            continue;
+
+        case '[':
+            if (&ep[17] >= endbuf)
+                SEDCOMPILE_ERROR(50);
+
+            *ep++ = CCL;
+            lc = 0;
+            for (i = 0; i < 16; i++)
+                ep[i] = 0;
+
+            neg = 0;
+            if ((c = GETC()) == '^') {
+                neg = 1;
+                c = GETC();
+            }
+            iflag = 1;
+            do {
+                c &= 0377;
+                if (c == '\0' || c == '\n')
+                    SEDCOMPILE_ERROR(49);
+                if ((c & 0200) && iflag) {
+                    iflag = 0;
+                    if (&ep[32] >= endbuf)
+                        SEDCOMPILE_ERROR(50);
+                    ep[-1] = CXCL;
+                    for (i = 16; i < 32; i++)
+                        ep[i] = 0;
+                }
+                if (c == '-' && lc != 0) {
+                    if ((c = GETC()) == ']') {
+                        PLACE('-');
+                        break;
+                    }
+                    if ((c & 0200) && iflag) {
+                        iflag = 0;
+                        if (&ep[32] >= endbuf)
+                            SEDCOMPILE_ERROR(50);
+                        ep[-1] = CXCL;
+                        for (i = 16; i < 32; i++)
+                            ep[i] = 0;
+                    }
+                    while (lc < c) {
+                        PLACE(lc);
+                        lc++;
+                    }
+                }
+                lc = c;
+                PLACE(c);
+            } while ((c = GETC()) != ']');
+
+            if (iflag)
+                iflag = 16;
+            else
+                iflag = 32;
+
+            if (neg) {
+                if (iflag == 32) {
+                    for (cclcnt = 0; cclcnt < iflag;
+                        cclcnt++)
+                        ep[cclcnt] ^= 0377;
+                    ep[0] &= 0376;
+                } else {
+                    ep[-1] = NCCL;
+                    /* make nulls match so test fails */
+                    ep[0] |= 01;
+                }
+            }
+
+            ep += iflag;
+
+            continue;
+
+        case '\\':
+            switch (c = GETC()) {
+
+            case '(':
+                if (compargs->nbra >= NBRA)
+                    SEDCOMPILE_ERROR(43);
+                *bracketp++ = compargs->nbra;
+                *ep++ = CBRA;
+                *ep++ = compargs->nbra++;
+                continue;
+
+            case ')':
+                if (bracketp <= bracket)
+                    SEDCOMPILE_ERROR(42);
+                *ep++ = CKET;
+                *ep++ = *--bracketp;
+                closed++;
+                continue;
+
+            case '{':
+                if (lastep == (char *) 0)
+                    goto defchar;
+                *lastep |= RNGE;
+                cflg = 0;
+            nlim:
+                c = GETC();
+                i = 0;
+                do {
+                    if ('0' <= c && c <= '9')
+                        i = 10 * i + c - '0';
+                    else
+                        SEDCOMPILE_ERROR(16);
+                } while (((c = GETC()) != '\\') && (c != ','));
+                if (i >= 255)
+                    SEDCOMPILE_ERROR(11);
+                *ep++ = i;
+                if (c == ',') {
+                    if (cflg++)
+                        SEDCOMPILE_ERROR(44);
+                    if ((c = GETC()) == '\\')
+                        *ep++ = (char) 255;
+                    else {
+                        UNGETC(c);
+                        goto nlim;
+                        /* get 2'nd number */
+                    }
+                }
+                if (GETC() != '}')
+                    SEDCOMPILE_ERROR(45);
+                if (!cflg)    /* one number */
+                    *ep++ = i;
+                else if ((ep[-1] & 0377) < (ep[-2] & 0377))
+                    SEDCOMPILE_ERROR(46);
+                continue;
+
+            case '\n':
+                SEDCOMPILE_ERROR(36);
+
+            case 'n':
+                c = '\n';
+                goto defchar;
+
+            default:
+                if (c >= '1' && c <= '9') {
+                    if ((c -= '1') >= closed)
+                        SEDCOMPILE_ERROR(25);
+                    *ep++ = CBACK;
+                    *ep++ = c;
+                    continue;
+                }
+            }
+    /* Drop through to default to use \ to turn off special chars */
+
+        defchar:
+        default:
+            lastep = ep;
+            *ep++ = CCHR;
+            *ep++ = c;
+        }
+    }
+out:
+    if (regerrno) {
+        regerr(commands, regerrno);
+        return (char*) NULL;
+    }
+    /* XXX : Basant : what extra */
+    /* int reglength = (int)(ep - expbuf); */
+    return ep;
+}
+
+int sed_step(char *p1, char *p2, int circf, step_vars_storage *vars)
+{
+    int c;
+
+
+    if (circf) {
+        vars->loc1 = p1;
+        return (_advance(p1, p2, vars));
+    }
+    /* fast check for first character */
+    if (*p2 == CCHR) {
+        c = p2[1];
+        do {
+            if (*p1 != c)
+                continue;
+            if (_advance(p1, p2, vars)) {
+                vars->loc1 = p1;
+                return (1);
+            }
+        } while (*p1++);
+        return (0);
+    }
+        /* regular algorithm */
+    do {
+        if (_advance(p1, p2, vars)) {
+            vars->loc1 = p1;
+            return (1);
+        }
+    } while (*p1++);
+    return (0);
+}
+
+static int _advance(char *lp, char *ep, step_vars_storage *vars)
+{
+    char *curlp;
+    int c;
+    char *bbeg;
+    char neg;
+    int ct;
+    int epint; /* int value of *ep */
+
+    while (1) {
+        neg = 0;
+        switch (*ep++) {
+
+        case CCHR:
+            if (*ep++ == *lp++)
+                continue;
+            return (0);
+
+        case CDOT:
+            if (*lp++)
+                continue;
+            return (0);
+
+        case CDOL:
+            if (*lp == 0)
+                continue;
+            return (0);
+
+        case CCEOF:
+            vars->loc2 = lp;
+            return (1);
+
+        case CXCL:
+            c = (unsigned char)*lp++;
+            if (ISTHERE(c)) {
+                ep += 32;
+                continue;
+            }
+            return (0);
+
+        case NCCL:
+            neg = 1;
+
+        case CCL:
+            c = *lp++;
+            if (((c & 0200) == 0 && ISTHERE(c)) ^ neg) {
+                ep += 16;
+                continue;
+            }
+            return (0);
+
+        case CBRA:
+            epint = (int) *ep;
+            vars->braslist[epint] = lp;
+            ep++;
+            continue;
+
+        case CKET:
+            epint = (int) *ep;
+            vars->braelist[epint] = lp;
+            ep++;
+            continue;
+
+        case CCHR | RNGE:
+            c = *ep++;
+            getrnge(ep, vars);
+            while (vars->low--)
+                if (*lp++ != c)
+                    return (0);
+            curlp = lp;
+            while (vars->size--)
+                if (*lp++ != c)
+                    break;
+            if (vars->size < 0)
+                lp++;
+            ep += 2;
+            goto star;
+
+        case CDOT | RNGE:
+            getrnge(ep, vars);
+            while (vars->low--)
+                if (*lp++ == '\0')
+                    return (0);
+            curlp = lp;
+            while (vars->size--)
+                if (*lp++ == '\0')
+                    break;
+            if (vars->size < 0)
+                lp++;
+            ep += 2;
+            goto star;
+
+        case CXCL | RNGE:
+            getrnge(ep + 32, vars);
+            while (vars->low--) {
+                c = (unsigned char)*lp++;
+                if (!ISTHERE(c))
+                    return (0);
+            }
+            curlp = lp;
+            while (vars->size--) {
+                c = (unsigned char)*lp++;
+                if (!ISTHERE(c))
+                    break;
+            }
+            if (vars->size < 0)
+                lp++;
+            ep += 34;        /* 32 + 2 */
+            goto star;
+
+        case NCCL | RNGE:
+            neg = 1;
+
+        case CCL | RNGE:
+            getrnge(ep + 16, vars);
+            while (vars->low--) {
+                c = *lp++;
+                if (((c & 0200) || !ISTHERE(c)) ^ neg)
+                    return (0);
+            }
+            curlp = lp;
+            while (vars->size--) {
+                c = *lp++;
+                if (((c & 0200) || !ISTHERE(c)) ^ neg)
+                    break;
+            }
+            if (vars->size < 0)
+                lp++;
+            ep += 18;         /* 16 + 2 */
+            goto star;
+
+        case CBACK:
+            epint = (int) *ep;
+            bbeg = vars->braslist[epint];
+            ct = vars->braelist[epint] - bbeg;
+            ep++;
+
+            if (ecmp(bbeg, lp, ct)) {
+                lp += ct;
+                continue;
+            }
+            return (0);
+
+        case CBACK | STAR:
+            epint = (int) *ep;
+            bbeg = vars->braslist[epint];
+            ct = vars->braelist[epint] - bbeg;
+            ep++;
+            curlp = lp;
+            while (ecmp(bbeg, lp, ct))
+                lp += ct;
+
+            while (lp >= curlp) {
+                if (_advance(lp, ep, vars))
+                    return (1);
+                lp -= ct;
+            }
+            return (0);
+
+
+        case CDOT | STAR:
+            curlp = lp;
+            while (*lp++);
+            goto star;
+
+        case CCHR | STAR:
+            curlp = lp;
+            while (*lp++ == *ep);
+            ep++;
+            goto star;
+
+        case CXCL | STAR:
+            curlp = lp;
+            do {
+                c = (unsigned char)*lp++;
+            } while (ISTHERE(c));
+            ep += 32;
+            goto star;
+
+        case NCCL | STAR:
+            neg = 1;
+
+        case CCL | STAR:
+            curlp = lp;
+            do {
+                c = *lp++;
+            } while (((c & 0200) == 0 && ISTHERE(c)) ^ neg);
+            ep += 16;
+            goto star;
+
+        star:
+            do {
+                if (--lp == vars->locs)
+                    break;
+                if (_advance(lp, ep, vars))
+                    return (1);
+            } while (lp > curlp);
+            return (0);
+
+        }
+    }
+}
+
+static void getrnge(char *str, step_vars_storage *vars)
+{
+    vars->low = *str++ & 0377;
+    vars->size = ((*str & 0377) == 255)? 20000: (*str &0377) - vars->low;
+}
+
+

Added: httpd/httpd/trunk/modules/filters/regexp.h
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/filters/regexp.h?rev=691418&view=auto
==============================================================================
--- httpd/httpd/trunk/modules/filters/regexp.h (added)
+++ httpd/httpd/trunk/modules/filters/regexp.h Tue Sep  2 16:01:47 2008
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2005, 2008 Sun Microsystems, Inc. All Rights Reserved.
+ * Use is subject to license terms.
+ *
+ *	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	
+ *	  All Rights Reserved  	
+ *
+ * University Copyright- Copyright (c) 1982, 1986, 1988
+ * The Regents of the University of California
+ * All Rights Reserved
+ *
+ * University Acknowledgment- Portions of this document are derived from
+ * software developed by the University of California, Berkeley, and its
+ * contributors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *  http://www.apache.org/licenses/LICENSE-2.0. 
+ * 
+ * Unless required by applicable law or agreed to in writing, software 
+ * distributed under the License is distributed on an "AS IS" BASIS, 
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 
+ * or implied. 
+ * See the License for the specific language governing permissions and
+ * limitations under the License. 
+ */
+
+#ifndef _REGEXP_H
+#define _REGEXP_H
+
+#include "libsed.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define    CBRA    2
+#define    CCHR    4
+#define    CDOT    8
+#define    CCL    12
+#define    CXCL    16
+#define    CDOL    20
+#define    CCEOF    22
+#define    CKET    24
+#define    CBACK    36
+#define    NCCL    40
+
+#define    STAR    01
+#define    RNGE    03
+
+#define    NBRA    9
+
+#define    PLACE(c)    ep[c >> 3] |= bittab[c & 07]
+#define    ISTHERE(c)    (ep[c >> 3] & bittab[c & 07])
+
+typedef struct _step_vars_storage {
+    char    *loc1, *loc2, *locs;
+    char    *braslist[NBRA];
+    char    *braelist[NBRA];
+    int    low;
+    int    size;
+} step_vars_storage;
+
+typedef struct _sed_comp_args {
+    int circf; /* Regular expression starts with ^ */
+    int nbra; /* braces count */
+} sed_comp_args;
+
+extern char *sed_compile(sed_commands_t *commands, sed_comp_args *compargs,
+                         char *ep, char *endbuf, int seof);
+extern void command_errf(sed_commands_t *commands, const char *fmt, ...);
+
+#define SEDERR_CGMES "command garbled: %s"
+#define SEDERR_SMMES "Space missing before filename: %s"
+#define SEDERR_TMMES "too much command text: %s"
+#define SEDERR_LTLMES "label too long: %s"
+#define SEDERR_ULMES "undefined label: %s"
+#define SEDERR_DLMES "duplicate labels: %s"
+#define SEDERR_TMLMES "too many labels: %s"
+#define SEDERR_AD0MES "no addresses allowed: %s"
+#define SEDERR_AD1MES "only one address allowed: %s"
+#define SEDERR_TOOBIG "suffix too large: %s"
+#define SEDERR_OOMMES "out of memory"
+#define SEDERR_COPFMES "cannot open pattern file: %s"
+#define SEDERR_COIFMES "cannot open input file: %s"
+#define SEDERR_TMOMES "too many {'s"
+#define SEDERR_TMCMES "too many }'s"
+#define SEDERR_NRMES "first RE may not be null"
+#define SEDERR_UCMES "unrecognized command: %s"
+#define SEDERR_TMWFMES "too many files in w commands"
+#define SEDERR_COMES "cannot open %s"
+#define SEDERR_CCMES "cannot create %s"
+#define SEDERR_TMLNMES "too many line numbers"
+#define SEDERR_TMAMES "too many appends after line %lld"
+#define SEDERR_TMRMES "too many reads after line %lld"
+#define SEDERR_DOORNG "``\\digit'' out of range: %s"
+#define SEDERR_EDMOSUB "ending delimiter missing on substitution: %s"
+#define SEDERR_EDMOSTR "ending delimiter missing on string: %s"
+#define SEDERR_FNTL "file name too long: %s"
+#define SEDERR_CLTL "command line too long"
+#define SEDERR_TSNTSS "transform strings not the same size: %s"
+#define SEDERR_OLTL "output line too long."
+#define SEDERR_HSOVERFLOW "hold space overflowed."
+#define SEDERR_INTERNAL "internal sed error"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _REGEXP_H */

Added: httpd/httpd/trunk/modules/filters/sed.h
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/filters/sed.h?rev=691418&view=auto
==============================================================================
--- httpd/httpd/trunk/modules/filters/sed.h (added)
+++ httpd/httpd/trunk/modules/filters/sed.h Tue Sep  2 16:01:47 2008
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2005, 2008 Sun Microsystems, Inc. All Rights Reserved.
+ * Use is subject to license terms.
+ *
+ *	Copyright (c) 1984 AT&T
+ *	  All Rights Reserved  	
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *  http://www.apache.org/licenses/LICENSE-2.0. 
+ * 
+ * Unless required by applicable law or agreed to in writing, software 
+ * distributed under the License is distributed on an "AS IS" BASIS, 
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 
+ * or implied. 
+ * See the License for the specific language governing permissions and
+ * limitations under the License. 
+ */
+
+#ifndef _SED_H
+#define _SED_H
+
+#include <stdlib.h>
+#include <limits.h>
+
+#define CEND    16
+#define CLNUM   14
+
+#define RESIZE  10000
+#define LBSIZE  1000
+
+#define ACOM    01
+#define BCOM    020
+#define CCOM    02
+#define CDCOM   025
+#define CNCOM   022
+#define COCOM   017
+#define CPCOM   023
+#define DCOM    03
+#define ECOM    015
+#define EQCOM   013
+#define FCOM    016
+#define GCOM    027
+#define CGCOM   030
+#define HCOM    031
+#define CHCOM   032
+#define ICOM    04
+#define LCOM    05
+#define NCOM    012
+#define PCOM    010
+#define QCOM    011
+#define RCOM    06
+#define SCOM    07
+#define TCOM    021
+#define WCOM    014
+#define CWCOM   024
+#define YCOM    026
+#define XCOM    033
+
+#endif /* _SED_H */

Added: httpd/httpd/trunk/modules/filters/sed0.c
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/filters/sed0.c?rev=691418&view=auto
==============================================================================
--- httpd/httpd/trunk/modules/filters/sed0.c (added)
+++ httpd/httpd/trunk/modules/filters/sed0.c Tue Sep  2 16:01:47 2008
@@ -0,0 +1,1026 @@
+/*
+ * Copyright (c) 2005, 2008 Sun Microsystems, Inc. All Rights Reserved.
+ * Use is subject to license terms.
+ *
+ *	Copyright (c) 1984 AT&T
+ *	  All Rights Reserved  	
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *  http://www.apache.org/licenses/LICENSE-2.0. 
+ * 
+ * Unless required by applicable law or agreed to in writing, software 
+ * distributed under the License is distributed on an "AS IS" BASIS, 
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 
+ * or implied. 
+ * See the License for the specific language governing permissions and
+ * limitations under the License. 
+ */
+
+#include "apr.h"
+#include "apr_strings.h"
+#include "libsed.h"
+#include "sed.h"
+#include "regexp.h"
+
+#define CCEOF 22
+
+static int fcomp(sed_commands_t *commands, apr_file_t *fin);
+static char *compsub(sed_commands_t *commands,
+                     sed_comp_args *compargs, char *rhsbuf);
+static int rline(sed_commands_t *commands, apr_file_t *fin,
+                 char *lbuf, char *lbend);
+static char *address(sed_commands_t *commands, char *expbuf,
+                     apr_status_t* status);
+static char *text(sed_commands_t *commands, char *textbuf, char *endbuf);
+static sed_label_t *search(sed_commands_t *commands);
+static char *ycomp(sed_commands_t *commands, char *expbuf);
+static char *comple(sed_commands_t *commands, sed_comp_args *compargs,
+                    char *x1, char *ep, char *x3, char x4);
+static sed_reptr_t *alloc_reptr(sed_commands_t *commands);
+static int check_finalized(const sed_commands_t *commands);
+
+void command_errf(sed_commands_t *commands, const char *fmt, ...)
+{
+    if (commands->errfn && commands->pool) {
+        va_list args;
+        const char* error;
+        va_start(args, fmt);
+        error = apr_pvsprintf(commands->pool, fmt, args);
+        commands->errfn(commands->data, error);
+        va_end(args);
+    }
+}
+
+/*
+ * sed_init_commands
+ */
+apr_status_t sed_init_commands(sed_commands_t *commands, sed_err_fn_t *errfn, void *data,
+                               apr_pool_t *p)
+{
+    memset(commands, 0, sizeof(*commands));
+
+    commands->errfn = errfn;
+    commands->data = data;
+
+    commands->labtab = commands->ltab;
+    commands->lab = commands->labtab + 1;
+    commands->pool = p;
+
+    commands->respace = apr_pcalloc(p, RESIZE);
+    if (commands->respace == NULL) {
+        command_errf(commands, SEDERR_OOMMES);
+        return APR_EGENERAL;
+    }
+
+    commands->rep = alloc_reptr(commands);
+    if (commands->rep == NULL)
+        return APR_EGENERAL;
+
+    commands->rep->ad1 = commands->respace;
+    commands->reend = &commands->respace[RESIZE - 1];
+    commands->labend = &commands->labtab[SED_LABSIZE];
+    commands->canbefinal = 1;
+
+    return APR_SUCCESS;
+}
+ 
+/*
+ * sed_destroy_commands
+ */
+void sed_destroy_commands(sed_commands_t *commands)
+{
+}
+
+/*
+ * sed_compile_string
+ */
+apr_status_t sed_compile_string(sed_commands_t *commands, const char *s)
+{
+    apr_status_t rv;
+
+    commands->earg = s;
+    commands->eflag = 1;
+
+    rv = fcomp(commands, NULL);
+    if (rv == APR_SUCCESS)
+        commands->canbefinal = check_finalized(commands);
+
+    commands->eflag = 0;
+
+    return (rv != 0 ? APR_EGENERAL : APR_SUCCESS);
+}
+
+/*
+ * sed_compile_file
+ */
+apr_status_t sed_compile_file(sed_commands_t *commands, apr_file_t *fin)
+{
+    apr_status_t rv = fcomp(commands, fin);
+    return (rv != 0 ? APR_EGENERAL : APR_SUCCESS);
+}
+
+/*
+ * sed_get_finalize_error
+ */
+char* sed_get_finalize_error(const sed_commands_t *commands, apr_pool_t* pool)
+{
+    const sed_label_t *lab;
+    if (commands->depth) {
+        return SEDERR_TMOMES;
+    }
+
+    /* Empty branch chain is not a issue */
+    for (lab = commands->labtab + 1; lab < commands->lab; lab++) {
+        char *error;
+        if (lab->address == 0) {
+            error = apr_psprintf(pool, SEDERR_ULMES, lab->asc);
+            return error;
+        }
+
+        if (lab->chain) {
+            return SEDERR_INTERNAL;
+        }
+    }
+    return NULL;
+}
+
+/*
+ * sed_canbe_finalized
+ */
+int sed_canbe_finalized(const sed_commands_t *commands)
+{
+    return commands->canbefinal;
+}
+
+/*
+ * check_finalized
+ */
+static int check_finalized(const sed_commands_t *commands)
+{
+    const sed_label_t *lab;
+    if (commands->depth) {
+        return 0;
+    }
+
+    /* Empty branch chain is not a issue */
+    for (lab = commands->labtab + 1; lab < commands->lab; lab++) {
+        if (lab->address == 0 || (lab->chain)) {
+            return 0;
+        }
+    }
+    return 1;
+}
+
+/*
+ * dechain
+ */
+static void dechain(sed_label_t *lpt, sed_reptr_t *address)
+{
+    sed_reptr_t *rep;
+    if ((lpt == NULL) || (lpt->chain == NULL) || (address == NULL))
+        return;
+    rep = lpt->chain;
+    while (rep->lb1) {
+        sed_reptr_t *next;
+
+        next = rep->lb1;
+        rep->lb1 = address;
+        rep = next;
+    }
+    rep->lb1 = address;
+    lpt->chain = NULL;
+}
+
+/*
+ * fcomp
+ */
+static int fcomp(sed_commands_t *commands, apr_file_t *fin)
+{
+    char *p, *op, *tp;
+    sed_reptr_t *pt, *pt1;
+    int i, ii;
+    sed_label_t *lpt;
+    char fnamebuf[APR_PATH_MAX];
+    apr_status_t status;
+    sed_comp_args compargs;
+
+    op = commands->lastre;
+    if (!commands->linebuf) {
+        commands->linebuf = apr_pcalloc(commands->pool, LBSIZE + 1);
+    }
+
+    if (rline(commands, fin, commands->linebuf,
+              (commands->linebuf + LBSIZE + 1)) < 0)
+        return 0;
+    if (*commands->linebuf == '#') {
+        if (commands->linebuf[1] == 'n')
+            commands->nflag = 1;
+    }
+    else {
+        commands->cp = commands->linebuf;
+        goto comploop;
+    }
+
+    for (;;) {
+        if (rline(commands, fin, commands->linebuf,
+                  (commands->linebuf + LBSIZE + 1)) < 0)
+            break;
+
+        commands->cp = commands->linebuf;
+
+comploop:
+        while (*commands->cp == ' ' || *commands->cp == '\t')
+            commands->cp++;
+        if (*commands->cp == '\0' || *commands->cp == '#')
+            continue;
+        if (*commands->cp == ';') {
+            commands->cp++;
+            goto comploop;
+        }
+
+        p = address(commands, commands->rep->ad1, &status);
+        if (status != APR_SUCCESS) {
+            command_errf(commands, SEDERR_CGMES, commands->linebuf);
+            return -1;
+        }
+
+        if (p == commands->rep->ad1) {
+            if (op)
+                commands->rep->ad1 = op;
+            else {
+                command_errf(commands, SEDERR_NRMES);
+                return -1;
+            }
+        } else if (p == 0) {
+            p = commands->rep->ad1;
+            commands->rep->ad1 = 0;
+        } else {
+            op = commands->rep->ad1;
+            if (*commands->cp == ',' || *commands->cp == ';') {
+                commands->cp++;
+                commands->rep->ad2 = p;
+                p = address(commands, commands->rep->ad2, &status);
+                if ((status != APR_SUCCESS) || (p == 0)) {
+                    command_errf(commands, SEDERR_CGMES, commands->linebuf);
+                    return -1;
+                }
+                if (p == commands->rep->ad2)
+                    commands->rep->ad2 = op;
+                else
+                    op = commands->rep->ad2;
+            } else
+                commands->rep->ad2 = 0;
+        }
+
+        if(p > &commands->respace[RESIZE-1]) {
+            command_errf(commands, SEDERR_TMMES);
+            return -1;
+        }
+
+        while (*commands->cp == ' ' || *commands->cp == '\t')
+            commands->cp++;
+
+swit:
+        switch(*commands->cp++) {
+        default:
+            command_errf(commands, SEDERR_UCMES, commands->linebuf);
+            return -1;
+
+        case '!':
+            commands->rep->negfl = 1;
+            goto swit;
+
+        case '{':
+            commands->rep->command = BCOM;
+            commands->rep->negfl = !(commands->rep->negfl);
+            commands->cmpend[commands->depth++] = &commands->rep->lb1;
+            commands->rep = alloc_reptr(commands);
+            commands->rep->ad1 = p;
+            if (*commands->cp == '\0')
+                continue;
+            goto comploop;
+
+        case '}':
+            if (commands->rep->ad1) {
+                command_errf(commands, SEDERR_AD0MES, commands->linebuf);
+                return -1;
+            }
+
+            if (--commands->depth < 0) {
+                command_errf(commands, SEDERR_TMCMES);
+                return -1;
+            }
+            *commands->cmpend[commands->depth] = commands->rep;
+
+            commands->rep->ad1 = p;
+            continue;
+
+        case '=':
+            commands->rep->command = EQCOM;
+            if (commands->rep->ad2) {
+                command_errf(commands, SEDERR_AD1MES, commands->linebuf);
+                return -1;
+            }
+            break;
+
+        case ':':
+            if (commands->rep->ad1) {
+                command_errf(commands, SEDERR_AD0MES, commands->linebuf);
+                return -1;
+            }
+
+            while (*commands->cp++ == ' ');
+            commands->cp--;
+
+            tp = commands->lab->asc;
+            while ((*tp++ = *commands->cp++)) {
+                if (tp >= &(commands->lab->asc[8])) {
+                    command_errf(commands, SEDERR_LTLMES, commands->linebuf);
+                    return -1;
+                }
+            }
+            *--tp = '\0';
+
+            if ((lpt = search(commands)) != NULL) {
+                if (lpt->address) {
+                    command_errf(commands, SEDERR_DLMES, commands->linebuf);
+                    return -1;
+                }
+                dechain(lpt, commands->rep);
+            } else {
+                commands->lab->chain = 0;
+                lpt = commands->lab;
+                if (++commands->lab >= commands->labend) {
+                    command_errf(commands, SEDERR_TMLMES, commands->linebuf);
+                    return -1;
+                }
+            }
+            lpt->address = commands->rep;
+            commands->rep->ad1 = p;
+
+            continue;
+
+        case 'a':
+            commands->rep->command = ACOM;
+            if (commands->rep->ad2) {
+                command_errf(commands, SEDERR_AD1MES, commands->linebuf);
+                return -1;
+            }
+            if (*commands->cp == '\\')
+                commands->cp++;
+            if (*commands->cp++ != '\n') {
+                command_errf(commands, SEDERR_CGMES, commands->linebuf);
+                return -1;
+            }
+            commands->rep->re1 = p;
+            p = text(commands, commands->rep->re1, commands->reend);
+            if (p == NULL)
+                return -1;
+            break;
+
+        case 'c':
+            commands->rep->command = CCOM;
+            if (*commands->cp == '\\') commands->cp++;
+            if (*commands->cp++ != ('\n')) {
+                command_errf(commands, SEDERR_CGMES, commands->linebuf);
+                return -1;
+            }
+            commands->rep->re1 = p;
+            p = text(commands, commands->rep->re1, commands->reend);
+            if (p == NULL)
+                return -1;
+            break;
+
+        case 'i':
+            commands->rep->command = ICOM;
+            if (commands->rep->ad2) {
+                command_errf(commands, SEDERR_AD1MES, commands->linebuf);
+                return -1;
+            }
+            if (*commands->cp == '\\') commands->cp++;
+            if (*commands->cp++ != ('\n')) {
+                command_errf(commands, SEDERR_CGMES, commands->linebuf);
+                return -1;
+            }
+            commands->rep->re1 = p;
+            p = text(commands, commands->rep->re1, commands->reend);
+            if (p == NULL)
+                return -1;
+            break;
+
+        case 'g':
+            commands->rep->command = GCOM;
+            break;
+
+        case 'G':
+            commands->rep->command = CGCOM;
+            break;
+
+        case 'h':
+            commands->rep->command = HCOM;
+            break;
+
+        case 'H':
+            commands->rep->command = CHCOM;
+            break;
+
+        case 't':
+            commands->rep->command = TCOM;
+            goto jtcommon;
+
+        case 'b':
+            commands->rep->command = BCOM;
+jtcommon:
+            while (*commands->cp++ == ' ');
+            commands->cp--;
+
+            if (*commands->cp == '\0') {
+                if ((pt = commands->labtab->chain) != NULL) {
+                    while ((pt1 = pt->lb1) != NULL)
+                        pt = pt1;
+                    pt->lb1 = commands->rep;
+                } else
+                    commands->labtab->chain = commands->rep;
+                break;
+            }
+            tp = commands->lab->asc;
+            while ((*tp++ = *commands->cp++))
+                if (tp >= &(commands->lab->asc[8])) {
+                    command_errf(commands, SEDERR_LTLMES, commands->linebuf);
+                    return -1;
+                }
+            commands->cp--;
+            *--tp = '\0';
+
+            if ((lpt = search(commands)) != NULL) {
+                if (lpt->address) {
+                    commands->rep->lb1 = lpt->address;
+                } else {
+                    pt = lpt->chain;
+                    while ((pt1 = pt->lb1) != NULL)
+                        pt = pt1;
+                    pt->lb1 = commands->rep;
+                }
+            } else {
+                commands->lab->chain = commands->rep;
+                commands->lab->address = 0;
+                if (++commands->lab >= commands->labend) {
+                    command_errf(commands, SEDERR_TMLMES, commands->linebuf);
+                    return -1;
+                }
+            }
+            break;
+
+        case 'n':
+            commands->rep->command = NCOM;
+            break;
+
+        case 'N':
+            commands->rep->command = CNCOM;
+            break;
+
+        case 'p':
+            commands->rep->command = PCOM;
+            break;
+
+        case 'P':
+            commands->rep->command = CPCOM;
+            break;
+
+        case 'r':
+            commands->rep->command = RCOM;
+            if (commands->rep->ad2) {
+                command_errf(commands, SEDERR_AD1MES, commands->linebuf);
+                return -1;
+            }
+            if (*commands->cp++ != ' ') {
+                command_errf(commands, SEDERR_CGMES, commands->linebuf);
+                return -1;
+            }
+            commands->rep->re1 = p;
+            p = text(commands, commands->rep->re1, commands->reend);
+            if (p == NULL)
+                return -1;
+            break;
+
+        case 'd':
+            commands->rep->command = DCOM;
+            break;
+
+        case 'D':
+            commands->rep->command = CDCOM;
+            commands->rep->lb1 = commands->ptrspace;
+            break;
+
+        case 'q':
+            commands->rep->command = QCOM;
+            if (commands->rep->ad2) {
+                command_errf(commands, SEDERR_AD1MES, commands->linebuf);
+                return -1;
+            }
+            break;
+
+        case 'l':
+            commands->rep->command = LCOM;
+            break;
+
+        case 's':
+            commands->rep->command = SCOM;
+            commands->sseof = *commands->cp++;
+            commands->rep->re1 = p;
+            p = comple(commands, &compargs, (char *) 0, commands->rep->re1,
+                       commands->reend, commands->sseof);
+            if (p == NULL)
+                return -1;
+            if (p == commands->rep->re1) {
+                if (op)
+                    commands->rep->re1 = op;
+                else {
+                    command_errf(commands, SEDERR_NRMES);
+                    return -1;
+                }
+            } else 
+                op = commands->rep->re1;
+            commands->rep->rhs = p;
+
+            p = compsub(commands, &compargs, commands->rep->rhs);
+            if ((p) == NULL)
+                return -1;
+
+            if (*commands->cp == 'g') {
+                commands->cp++;
+                commands->rep->gfl = 999;
+            } else if (commands->gflag)
+                commands->rep->gfl = 999;
+
+            if (*commands->cp >= '1' && *commands->cp <= '9') {
+                i = *commands->cp - '0';
+                commands->cp++;
+                while (1) {
+                    ii = *commands->cp;
+                    if (ii < '0' || ii > '9')
+                        break;
+                    i = i*10 + ii - '0';
+                    if (i > 512) {
+                        command_errf(commands, SEDERR_TOOBIG, commands->linebuf);
+                        return -1;
+                    }
+                    commands->cp++;
+                }
+                commands->rep->gfl = i;
+            }
+
+            if (*commands->cp == 'p') {
+                commands->cp++;
+                commands->rep->pfl = 1;
+            }
+
+            if (*commands->cp == 'P') {
+                commands->cp++;
+                commands->rep->pfl = 2;
+            }
+
+            if (*commands->cp == 'w') {
+                commands->cp++;
+                if (*commands->cp++ !=  ' ') {
+                    command_errf(commands, SEDERR_SMMES, commands->linebuf);
+                    return -1;
+                }
+                if (text(commands, fnamebuf, &fnamebuf[APR_PATH_MAX]) == NULL) {
+                    command_errf(commands, SEDERR_FNTL, commands->linebuf);
+                    return -1;
+                }
+                for (i = commands->nfiles - 1; i >= 0; i--)
+                    if (strcmp(fnamebuf,commands->fname[i]) == 0) {
+                        commands->rep->findex = i;
+                        goto done;
+                    }
+                if (commands->nfiles >= NWFILES) {
+                    command_errf(commands, SEDERR_TMWFMES);
+                    return -1;
+                }
+                commands->fname[commands->nfiles] =
+                            apr_pstrdup(commands->pool, fnamebuf);
+                if (commands->fname[commands->nfiles] == NULL) {
+                    command_errf(commands, SEDERR_OOMMES);
+                    return -1;
+                }
+                commands->rep->findex = commands->nfiles++;
+            }
+            break;
+
+        case 'w':
+            commands->rep->command = WCOM;
+            if (*commands->cp++ != ' ') {
+                command_errf(commands, SEDERR_SMMES, commands->linebuf);
+                return -1;
+            }
+            if (text(commands, fnamebuf, &fnamebuf[APR_PATH_MAX]) == NULL) {
+                command_errf(commands, SEDERR_FNTL, commands->linebuf);
+                return -1;
+            }
+            for (i = commands->nfiles - 1; i >= 0; i--)
+                if (strcmp(fnamebuf, commands->fname[i]) == 0) {
+                    commands->rep->findex = i;
+                    goto done;
+                }
+            if (commands->nfiles >= NWFILES) {
+                command_errf(commands, SEDERR_TMWFMES);
+                return -1;
+            }
+            if ((commands->fname[commands->nfiles] =
+                        apr_pstrdup(commands->pool, fnamebuf)) == NULL) {
+                command_errf(commands, SEDERR_OOMMES);
+                return -1;
+            }
+
+            commands->rep->findex = commands->nfiles++;
+            break;
+
+        case 'x':
+            commands->rep->command = XCOM;
+            break;
+
+        case 'y':
+            commands->rep->command = YCOM;
+            commands->sseof = *commands->cp++;
+            commands->rep->re1 = p;
+            p = ycomp(commands, commands->rep->re1);
+            if (p == NULL)
+                return -1;
+            break;
+        }
+done:
+        commands->rep = alloc_reptr(commands);
+
+        commands->rep->ad1 = p;
+
+        if (*commands->cp++ != '\0') {
+            if (commands->cp[-1] == ';')
+                goto comploop;
+            command_errf(commands, SEDERR_CGMES, commands->linebuf);
+            return -1;
+        }
+    }
+    commands->rep->command = 0;
+    commands->lastre = op;
+
+    return 0;
+}
+
+static char *compsub(sed_commands_t *commands,
+                     sed_comp_args *compargs, char *rhsbuf)
+{
+    char   *p, *q;
+
+    p = rhsbuf;
+    q = commands->cp;
+    for(;;) {
+        if(p > &commands->respace[RESIZE-1]) {
+            command_errf(commands, SEDERR_TMMES, commands->linebuf);
+            return NULL;
+        }
+        if((*p = *q++) == '\\') {
+            p++;
+            if(p > &commands->respace[RESIZE-1]) {
+                command_errf(commands, SEDERR_TMMES, commands->linebuf);
+                return NULL;
+            }
+            *p = *q++;
+            if(*p > compargs->nbra + '0' && *p <= '9') {
+                command_errf(commands, SEDERR_DOORNG, commands->linebuf);
+                return NULL;
+            }
+            p++;
+            continue;
+        }
+        if(*p == commands->sseof) {
+            *p++ = '\0';
+            commands->cp = q;
+            return(p);
+        }
+          if(*p++ == '\0') {
+            command_errf(commands, SEDERR_EDMOSUB, commands->linebuf);
+            return NULL;
+        }
+    }
+}
+
+/*
+ * rline
+ */
+static int rline(sed_commands_t *commands, apr_file_t *fin,
+                 char *lbuf, char *lbend)
+{
+    char   *p;
+    const char *q;
+    int    t;
+    apr_size_t bytes_read;
+
+    p = lbuf;
+
+    if(commands->eflag) {
+        if(commands->eflag > 0) {
+            commands->eflag = -1;
+            q = commands->earg;
+            while((t = *q++) != '\0') {
+                if(t == '\n') {
+                    commands->saveq = q;
+                    goto out1;
+                }
+                if (p < lbend)
+                    *p++ = t;
+                if(t == '\\') {
+                    if((t = *q++) == '\0') {
+                        commands->saveq = NULL;
+                        return(-1);
+                    }
+                    if (p < lbend)
+                        *p++ = t;
+                }
+            }
+            commands->saveq = NULL;
+
+        out1:
+            if (p == lbend) {
+                command_errf(commands, SEDERR_CLTL, commands->linebuf);
+                return -1;
+            }
+            *p = '\0';
+            return(1);
+        }
+        if((q = commands->saveq) == 0)    return(-1);
+
+        while((t = *q++) != '\0') {
+            if(t == '\n') {
+                commands->saveq = q;
+                goto out2;
+            }
+            if(p < lbend)
+                *p++ = t;
+            if(t == '\\') {
+                if((t = *q++) == '\0') {
+                    commands->saveq = NULL;
+                    return(-1);
+                }
+                if (p < lbend)
+                    *p++ = t;
+            }
+        }
+        commands->saveq = NULL;
+
+    out2:
+        if (p == lbend) {
+            command_errf(commands, SEDERR_CLTL, commands->linebuf);
+            return -1;
+        }
+        *p = '\0';
+        return(1);
+    }
+
+    bytes_read = 1;
+    /* XXX extremely inefficient 1 byte reads */
+    while (apr_file_read(fin, &t, &bytes_read) != APR_SUCCESS) {
+        if(t == '\n') {
+            if (p == lbend) {
+                command_errf(commands, SEDERR_CLTL, commands->linebuf);
+                return -1;
+            }
+            *p = '\0';
+            return(1);
+        }
+        if (p < lbend)
+            *p++ = t;
+        if(t == '\\') {
+            bytes_read = 1;
+            if (apr_file_read(fin, &t, &bytes_read) != APR_SUCCESS) {
+                return -1;
+            }
+            if(p < lbend)
+                *p++ = t;
+        }
+        bytes_read = 1;
+    }
+    return(-1);
+}
+
+/*
+ * address
+ */
+static char *address(sed_commands_t *commands, char *expbuf,
+                     apr_status_t* status)
+{
+    char   *rcp;
+    apr_int64_t lno;
+    sed_comp_args compargs;
+
+    *status = APR_SUCCESS;
+    if(*commands->cp == '$') {
+        if (expbuf > &commands->respace[RESIZE-2]) {
+            command_errf(commands, SEDERR_TMMES, commands->linebuf);
+            *status = APR_EGENERAL;
+            return NULL;
+        }
+        commands->cp++;
+        *expbuf++ = CEND;
+        *expbuf++ = CCEOF;
+        return(expbuf);
+    }
+    if (*commands->cp == '/' || *commands->cp == '\\' ) {
+        if ( *commands->cp == '\\' )
+            commands->cp++;
+        commands->sseof = *commands->cp++;
+        return(comple(commands, &compargs, (char *) 0, expbuf, commands->reend,
+                      commands->sseof));
+    }
+
+    rcp = commands->cp;
+    lno = 0;
+
+    while(*rcp >= '0' && *rcp <= '9')
+        lno = lno*10 + *rcp++ - '0';
+
+    if(rcp > commands->cp) {
+        if (expbuf > &commands->respace[RESIZE-3]) {
+            command_errf(commands, SEDERR_TMMES, commands->linebuf);
+            *status = APR_EGENERAL;
+            return NULL;
+        }
+        *expbuf++ = CLNUM;
+        *expbuf++ = commands->nlno;
+        commands->tlno[commands->nlno++] = lno;
+        if(commands->nlno >= SED_NLINES) {
+            command_errf(commands, SEDERR_TMLNMES, commands->linebuf);
+            *status = APR_EGENERAL;
+            return NULL;
+        }
+        *expbuf++ = CCEOF;
+        commands->cp = rcp;
+        return(expbuf);
+    }
+    return(NULL);
+}
+
+/*
+ * text
+ */
+static char *text(sed_commands_t *commands, char *textbuf, char *tbend)
+{
+    char   *p, *q;
+
+    p = textbuf;
+    q = commands->cp;
+#ifndef S5EMUL
+    /*
+     * Strip off indentation from text to be inserted.
+     */
+    while(*q == '\t' || *q == ' ')    q++;
+#endif
+    for(;;) {
+
+        if(p > tbend)
+            return(NULL);    /* overflowed the buffer */
+        if((*p = *q++) == '\\')
+            *p = *q++;
+        if(*p == '\0') {
+            commands->cp = --q;
+            return(++p);
+        }
+#ifndef S5EMUL
+        /*
+         * Strip off indentation from text to be inserted.
+         */
+        if(*p == '\n') {
+            while(*q == '\t' || *q == ' ')    q++;
+        }
+#endif
+        p++;
+    }
+}
+
+
+/*
+ * search
+ */
+static sed_label_t *search(sed_commands_t *commands)
+{
+    sed_label_t *rp;
+    sed_label_t *ptr;
+
+    rp = commands->labtab;
+    ptr = commands->lab;
+    while (rp < ptr) {
+        if (strcmp(rp->asc, ptr->asc) == 0)
+            return rp;
+        rp++;
+    }
+
+    return 0;
+}
+
+/*
+ * ycomp
+ */
+static char *ycomp(sed_commands_t *commands, char *expbuf)
+{
+    char    c;
+    int cint; /* integer value of char c */
+    char *ep, *tsp;
+    int i;
+    char    *sp;
+
+    ep = expbuf;
+    if(ep + 0377 > &commands->respace[RESIZE-1]) {
+        command_errf(commands, SEDERR_TMMES, commands->linebuf);
+        return NULL;
+    }
+    sp = commands->cp;
+    for(tsp = commands->cp; (c = *tsp) != commands->sseof; tsp++) {
+        if(c == '\\')
+            tsp++;
+        if(c == '\0' || c == '\n') {
+            command_errf(commands, SEDERR_EDMOSTR, commands->linebuf);
+            return NULL;
+        }
+    }
+    tsp++;
+    memset(ep, 0, 0400);
+
+    while((c = *sp++) != commands->sseof) {
+        c &= 0377;
+        if(c == '\\' && *sp == 'n') {
+            sp++;
+            c = '\n';
+        }
+        cint = (int) c;
+        if((ep[cint] = *tsp++) == '\\' && *tsp == 'n') {
+            ep[cint] = '\n';
+            tsp++;
+        }
+        if(ep[cint] == commands->sseof || ep[cint] == '\0') {
+            command_errf(commands, SEDERR_TSNTSS, commands->linebuf);
+        }
+    }
+    if(*tsp != commands->sseof) {
+        if(*tsp == '\0') {
+            command_errf(commands, SEDERR_EDMOSTR, commands->linebuf);
+        }
+        else {
+            command_errf(commands, SEDERR_TSNTSS, commands->linebuf);
+        }
+        return NULL;
+    }
+    commands->cp = ++tsp;
+
+    for(i = 0; i < 0400; i++)
+        if(ep[i] == 0)
+            ep[i] = i;
+
+    return(ep + 0400);
+}
+
+/*
+ * comple
+ */
+static char *comple(sed_commands_t *commands, sed_comp_args *compargs,
+                    char *x1, char *ep, char *x3, char x4)
+{
+    char *p;
+
+    p = sed_compile(commands, compargs, ep + 1, x3, x4);
+    if(p == ep + 1)
+        return(ep);
+    *ep = compargs->circf;
+    return(p);
+}
+
+/*
+ * alloc_reptr
+ */
+static sed_reptr_t *alloc_reptr(sed_commands_t *commands)
+{
+    sed_reptr_t *var;
+
+    var = apr_pcalloc(commands->pool, sizeof(sed_reptr_t));
+    if (var == NULL) {
+        command_errf(commands, SEDERR_OOMMES);
+        return 0;
+    }
+
+    var->nrep = commands->nrep;
+    var->findex = -1;
+    commands->nrep++;
+
+    if (commands->ptrspace == NULL)
+        commands->ptrspace = var;
+    else
+        commands->ptrend->next = var;
+
+    commands->ptrend = var;
+    commands->labtab->address = var;
+    return var;
+}
+
+



Re: svn commit: r691418 [1/2] - in /httpd/httpd/trunk: ./ docs/manual/mod/ modules/filters/

Posted by "William A. Rowe, Jr." <wr...@rowe-clan.net>.
Nick Kew wrote:
> On Thu, 04 Sep 2008 21:47:26 -0500
> "William A. Rowe, Jr." <wr...@rowe-clan.net> wrote:
> 
>> Basant Kukreja wrote:
>>> Based on your suggestion, I will check what are the other
>>> improvements from mod_substitute can be brought into mod_sed.
>> Note that mod_substitute's brigade handling is already based on the
>> work of both Jim and Nick (author of mod_line_edit) - so they are
>> pretty certain that it is the right approach.  Good idea to borrow
>> from it.
> 
> Um - make that [everyone].  mod_substitute got pulled apart a bit
> and optimised when Jim dropped it in to trunk, and ISTR several
> folks contributing to that.
> 
> Hopefully Rudiger's comments are just the start of the same
> process of refinement on mod_sed.

+1; didn't mean to slight [anyone] - thanks for raising that, Nick :)

Re: svn commit: r691418 [1/2] - in /httpd/httpd/trunk: ./ docs/manual/mod/ modules/filters/

Posted by Nick Kew <ni...@webthing.com>.
On Thu, 04 Sep 2008 21:47:26 -0500
"William A. Rowe, Jr." <wr...@rowe-clan.net> wrote:

> Basant Kukreja wrote:
> > 
> > Based on your suggestion, I will check what are the other
> > improvements from mod_substitute can be brought into mod_sed.
> 
> Note that mod_substitute's brigade handling is already based on the
> work of both Jim and Nick (author of mod_line_edit) - so they are
> pretty certain that it is the right approach.  Good idea to borrow
> from it.

Um - make that [everyone].  mod_substitute got pulled apart a bit
and optimised when Jim dropped it in to trunk, and ISTR several
folks contributing to that.

Hopefully Rudiger's comments are just the start of the same
process of refinement on mod_sed.

-- 
Nick Kew

Application Development with Apache - the Apache Modules Book
http://www.apachetutor.org/

Re: svn commit: r691418 [1/2] - in /httpd/httpd/trunk: ./ docs/manual/mod/ modules/filters/

Posted by Basant Kumar kukreja <Ba...@Sun.COM>.
* Transient bucket seems to be working fine in mod_sed.
* Added error handling code so that if ap_pass_brigade fails during
  request processing, error is returned to sed_response_filter /
  sed_request_filter.

Testing :
* Compiled with 2.2 branch and make sure there is no regression (against gsed
test cases).
* Compiled with trunk.

Final patch is attached.

Regards,
Basant.



On Mon, Sep 15, 2008 at 12:17:04AM -0700, Basant Kukreja wrote:
> Hi,
> 
> Attached is the *rough* patch which uses transient buckets in mod_sed output
> filter.
> 
> Testing :
>   I created a 30MB and 300MB text files and ran OutputSed commands on the file.
> * Before the patch, process size (worker mpm with 1 thread) increased up to 300M 
> for single request.  After the  patch, process size remains to 3MB to server
> 300M response output.
> 
> I also removed 1 extra copying for processing output.
> 
> I need to add some more error handling to finalize the patch. Any comments are
> welcome.
> 
> Regards,
> Basant.
> 
> On Thu, Sep 04, 2008 at 09:47:26PM -0500, William A. Rowe, Jr. wrote:
> > Basant Kukreja wrote:
> >>
> >> Based on your suggestion, I will check what are the other improvements from
> >> mod_substitute can be brought into mod_sed.
> >
> > Note that mod_substitute's brigade handling is already based on the work of
> > both Jim and Nick (author of mod_line_edit) - so they are pretty certain
> > that it is the right approach.  Good idea to borrow from it.
> >
> > Bill

> Index: modules/filters/mod_sed.c
> ===================================================================
> --- modules/filters/mod_sed.c	(revision 692768)
> +++ modules/filters/mod_sed.c	(working copy)
> @@ -26,7 +26,8 @@
>  #include "libsed.h"
>  
>  static const char *sed_filter_name = "Sed";
> -#define MODSED_OUTBUF_SIZE 4000
> +#define MODSED_OUTBUF_SIZE 8000
> +#define MAX_TRANSIENT_BUCKETS 50
>  
>  typedef struct sed_expr_config
>  {
> @@ -44,11 +45,14 @@
>  typedef struct sed_filter_ctxt
>  {
>      sed_eval_t eval;
> +    ap_filter_t *f;
>      request_rec *r;
>      apr_bucket_brigade *bb;
>      char *outbuf;
>      char *curoutbuf;
>      int bufsize;
> +    apr_pool_t *tpool;
> +    int numbuckets;
>  } sed_filter_ctxt;
>  
>  module AP_MODULE_DECLARE_DATA sed_module;
> @@ -71,29 +75,68 @@
>      sed_cfg->last_error = error;
>  }
>  
> +/* clear the temporary pool (used for transient buckets)
> + */
> +static void clear_ctxpool(sed_filter_ctxt* ctx)
> +{
> +    apr_pool_clear(ctx->tpool);
> +    ctx->outbuf = NULL;
> +    ctx->curoutbuf = NULL;
> +    ctx->numbuckets = 0;
> +}
> +
> +/* alloc_outbuf
> + * allocate output buffer
> + */
> +static void alloc_outbuf(sed_filter_ctxt* ctx)
> +{
> +    ctx->outbuf = apr_palloc(ctx->tpool, ctx->bufsize + 1);
> +    ctx->curoutbuf = ctx->outbuf;
> +}
> +
> +/* append_bucket
> + * Allocate a new bucket from buf and sz and append to ctx->bb
> + */
> +static void append_bucket(sed_filter_ctxt* ctx, char* buf, int sz)
> +{
> +    int rv;
> +    apr_bucket *b;
> +    if (ctx->tpool == ctx->r->pool) {
> +        /* We are not using transient bucket */
> +        b = apr_bucket_pool_create(buf, sz, ctx->r->pool,
> +                                   ctx->r->connection->bucket_alloc);
> +        APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
> +    }
> +    else {
> +        /* We are using transient bucket */
> +        b = apr_bucket_transient_create(buf, sz,
> +                                        ctx->r->connection->bucket_alloc);
> +        APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
> +        ctx->numbuckets++;
> +        if (ctx->numbuckets >= MAX_TRANSIENT_BUCKETS) {
> +            b = apr_bucket_flush_create(ctx->r->connection->bucket_alloc);
> +            APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
> +            rv = ap_pass_brigade(ctx->f->next, ctx->bb);
> +            apr_brigade_cleanup(ctx->bb);
> +            clear_ctxpool(ctx);
> +        }
> +    }
> +}
> +
>  /*
>   * flush_output_buffer
>   * Flush the  output data (stored in ctx->outbuf)
>   */
> -static void flush_output_buffer(sed_filter_ctxt *ctx, char* buf, int sz)
> +static void flush_output_buffer(sed_filter_ctxt *ctx)
>  {
>      int size = ctx->curoutbuf - ctx->outbuf;
>      char *out;
> -    apr_bucket *b;
> -    if (size + sz <= 0)
> +    if ((ctx->outbuf == NULL) || (size <=0))
>          return;
> -    out = apr_palloc(ctx->r->pool, size + sz);
> -    if (size) {
> -        memcpy(out, ctx->outbuf, size);
> -    }
> -    if (buf && (sz > 0)) {
> -        memcpy(out + size, buf, sz);
> -    }
> -    /* Reset the output buffer position */
> +    out = apr_palloc(ctx->tpool, size);
> +    memcpy(out, ctx->outbuf, size);
> +    append_bucket(ctx, out, size);
>      ctx->curoutbuf = ctx->outbuf;
> -    b = apr_bucket_pool_create(out, size + sz, ctx->r->pool,
> -                               ctx->r->connection->bucket_alloc);
> -    APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
>  }
>  
>  /* This is a call back function. When libsed wants to generate the output,
> @@ -104,11 +147,38 @@
>      /* dummy is basically filter context. Context is passed during invocation
>       * of sed_eval_buffer
>       */
> +    int remainbytes = 0;
>      sed_filter_ctxt *ctx = (sed_filter_ctxt *) dummy;
> -    if (((ctx->curoutbuf - ctx->outbuf) + sz) >= ctx->bufsize) {
> -        /* flush current buffer */
> -        flush_output_buffer(ctx, buf, sz);
> +    if (ctx->outbuf == NULL) {
> +        alloc_outbuf(ctx);
>      }
> +    remainbytes = ctx->bufsize - (ctx->curoutbuf - ctx->outbuf);
> +    if (sz >= remainbytes) {
> +        if (remainbytes > 0) {
> +            memcpy(ctx->curoutbuf, buf, remainbytes);
> +            buf += remainbytes;
> +            sz -= remainbytes;
> +            ctx->curoutbuf += remainbytes;
> +        }
> +        /* buffer is now full */
> +        append_bucket(ctx, ctx->outbuf, ctx->bufsize);
> +        /* old buffer is now used so allocate new buffer */
> +        alloc_outbuf(ctx);
> +        /* if size is bigger than the allocated buffer directly add to output brigade */
> +        if (sz >= ctx->bufsize) {
> +            char* newbuf = apr_palloc(ctx->tpool, sz);
> +            memcpy(newbuf, buf, sz);
> +            append_bucket(ctx, newbuf, sz);
> +            /* pool might get clear after append_bucket */
> +            if (ctx->outbuf == NULL) {
> +                alloc_outbuf(ctx);
> +            }
> +        }
> +        else {
> +            memcpy(ctx->curoutbuf, buf, sz);
> +            ctx->curoutbuf += sz;
> +        }
> +    }
>      else {
>          memcpy(ctx->curoutbuf, buf, sz);
>          ctx->curoutbuf += sz;
> @@ -153,10 +223,11 @@
>  
>  /* Initialize sed filter context. If successful then context is set in f->ctx
>   */
> -static apr_status_t init_context(ap_filter_t *f, sed_expr_config *sed_cfg)
> +static apr_status_t init_context(ap_filter_t *f, sed_expr_config *sed_cfg, int usetpool)
>  {
>      apr_status_t status;
>      sed_filter_ctxt* ctx;
> +    apr_pool_t *tpool;
>      request_rec *r = f->r;
>      /* Create the context. Call sed_init_eval. libsed will generated
>       * output by calling sed_write_output and generates any error by
> @@ -165,6 +236,8 @@
>      ctx = apr_pcalloc(r->pool, sizeof(sed_filter_ctxt));
>      ctx->r = r;
>      ctx->bb = NULL;
> +    ctx->numbuckets = 0;
> +    ctx->f = f;
>      status = sed_init_eval(&ctx->eval, sed_cfg->sed_cmds, log_sed_errf,
>                             r, &sed_write_output, r->pool);
>      if (status != APR_SUCCESS) {
> @@ -173,8 +246,13 @@
>      apr_pool_cleanup_register(r->pool, &ctx->eval, sed_eval_cleanup,
>                                apr_pool_cleanup_null);
>      ctx->bufsize = MODSED_OUTBUF_SIZE;
> -    ctx->outbuf = apr_palloc(r->pool, ctx->bufsize + 1);
> -    ctx->curoutbuf = ctx->outbuf;
> +    if (usetpool) {
> +        apr_pool_create(&(ctx->tpool), r->pool);
> +    }
> +    else {
> +        ctx->tpool = r->pool;
> +    }
> +    alloc_outbuf(ctx);
>      f->ctx = ctx;
>      return APR_SUCCESS;
>  }
> @@ -204,10 +282,11 @@
>              return ap_pass_brigade(f->next, bb);
>          }
>  
> -        status = init_context(f, sed_cfg);
> +        status = init_context(f, sed_cfg, 1);
>          if (status != APR_SUCCESS)
>               return status;
>          ctx = f->ctx;
> +        apr_table_unset(f->r->headers_out, "Content-Length");
>      }
>  
>      ctx->bb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
> @@ -239,7 +318,7 @@
>              apr_bucket *b1 = APR_BUCKET_NEXT(b);
>              /* Now clean up the internal sed buffer */
>              sed_finalize_eval(&ctx->eval, ctx);
> -            flush_output_buffer(ctx, NULL, 0);
> +            flush_output_buffer(ctx);
>              APR_BUCKET_REMOVE(b);
>              /* Insert the eos bucket to ctx->bb brigade */
>              APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
> @@ -248,12 +327,8 @@
>          else if (APR_BUCKET_IS_FLUSH(b)) {
>              apr_bucket *b1 = APR_BUCKET_NEXT(b);
>              APR_BUCKET_REMOVE(b);
> +            flush_output_buffer(ctx);
>              APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
> -            status = ap_pass_brigade(f->next, ctx->bb);
> -            apr_brigade_cleanup(ctx->bb);
> -            if (status != APR_SUCCESS) {
> -                return status;
> -            }
>              b = b1;
>          }
>          else if (APR_BUCKET_IS_METADATA(b)) {
> @@ -264,9 +339,9 @@
>              apr_bucket *b1 = APR_BUCKET_NEXT(b);
>              status = sed_eval_buffer(&ctx->eval, buf, bytes, ctx);
>              if (status != APR_SUCCESS) {
> +                clear_ctxpool(ctx);
>                  return status;
>              }
> -            flush_output_buffer(ctx, NULL, 0);
>              APR_BUCKET_REMOVE(b);
>              apr_bucket_delete(b);
>              b = b1;
> @@ -278,7 +353,14 @@
>          }
>      }
>      apr_brigade_cleanup(bb);
> -    return ap_pass_brigade(f->next, ctx->bb);
> +    flush_output_buffer(ctx);
> +    status = APR_SUCCESS;
> +    if (!APR_BRIGADE_EMPTY(ctx->bb)) {
> +        status = ap_pass_brigade(f->next, ctx->bb);
> +        apr_brigade_cleanup(ctx->bb);
> +    }
> +    clear_ctxpool(ctx);
> +    return status;
>  }
>  
>  /* Entry function for Sed input filter */
> @@ -309,7 +391,7 @@
>              /* XXX : Should we filter the sub requests too */
>              return ap_get_brigade(f->next, bb, mode, block, readbytes);
>          }
> -        status = init_context(f, sed_cfg);
> +        status = init_context(f, sed_cfg, 0);
>          if (status != APR_SUCCESS)
>               return status;
>          ctx = f->ctx;
> @@ -352,7 +434,7 @@
>              if (APR_BUCKET_IS_EOS(b)) {
>                  /* eos bucket. Clear the internal sed buffers */
>                  sed_finalize_eval(&ctx->eval, ctx);
> -                flush_output_buffer(ctx, NULL, 0);
> +                flush_output_buffer(ctx);
>                  APR_BUCKET_REMOVE(b);
>                  APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
>                  break;
> @@ -366,7 +448,7 @@
>                  status = sed_eval_buffer(&ctx->eval, buf, bytes, ctx);
>                  if (status != APR_SUCCESS)
>                      return status;
> -                flush_output_buffer(ctx, NULL, 0);
> +                flush_output_buffer(ctx);
>              }
>          }
>          apr_brigade_cleanup(bbinp);


Re: svn commit: r691418 [1/2] - in /httpd/httpd/trunk: ./ docs/manual/mod/ modules/filters/

Posted by Basant Kukreja <Ba...@Sun.COM>.
Hi,

Attached is the *rough* patch which uses transient buckets in mod_sed output
filter.

Testing :
  I created a 30MB and 300MB text files and ran OutputSed commands on the file.
* Before the patch, process size (worker mpm with 1 thread) increased up to 300M 
for single request.  After the  patch, process size remains to 3MB to server
300M response output.

I also removed 1 extra copying for processing output.

I need to add some more error handling to finalize the patch. Any comments are
welcome.

Regards,
Basant.

On Thu, Sep 04, 2008 at 09:47:26PM -0500, William A. Rowe, Jr. wrote:
> Basant Kukreja wrote:
>>
>> Based on your suggestion, I will check what are the other improvements from
>> mod_substitute can be brought into mod_sed.
>
> Note that mod_substitute's brigade handling is already based on the work of
> both Jim and Nick (author of mod_line_edit) - so they are pretty certain
> that it is the right approach.  Good idea to borrow from it.
>
> Bill

Re: svn commit: r691418 [1/2] - in /httpd/httpd/trunk: ./ docs/manual/mod/ modules/filters/

Posted by "William A. Rowe, Jr." <wr...@rowe-clan.net>.
Basant Kukreja wrote:
> 
> Based on your suggestion, I will check what are the other improvements from
> mod_substitute can be brought into mod_sed.

Note that mod_substitute's brigade handling is already based on the work of
both Jim and Nick (author of mod_line_edit) - so they are pretty certain
that it is the right approach.  Good idea to borrow from it.

Bill

Re: svn commit: r691418 [1/2] - in /httpd/httpd/trunk: ./ docs/manual/mod/ modules/filters/

Posted by Basant Kukreja <Ba...@Sun.COM>.
>> +           OutputSed "s/monday/MON/g" <br />
>> +           OutputSed "s/sunday/SUN/g" <br />
>
> I guess it should be InputSed here.
>
You are right. It is a mistake.

>> +static const char *sed_filter_name = "Sed";
>> +#define MODSED_OUTBUF_SIZE 4000
>
> Why no using 8195 here? This would create a buffer with the size of a whole page
> on most platforms or a multiple thereof.
>
I agree that (PAGESIZE - 1) would be a better choice.

Based on your suggestion, I will check what are the other improvements from
mod_substitute can be brought into mod_sed.

Regards,
Basant.


Re: svn commit: r691418 [1/2] - in /httpd/httpd/trunk: ./ docs/manual/mod/ modules/filters/

Posted by Basant Kumar kukreja <Ba...@Sun.COM>.
>> +           OutputSed "s/sunday/SUN/g" <br />
>
> I guess it should be InputSed here.
>

Patch for the documentation fix is attached.

Regards,
Basant.

$ svn diff mod_sed.xml
Index: mod_sed.xml
===================================================================
--- mod_sed.xml (revision 692275)
+++ mod_sed.xml (working copy)
@@ -110,8 +110,8 @@
         &lt;Directory "/var/www/docs/sed"&gt; <br />
            <indent>
            AddInputFilter Sed php <br />
-           OutputSed "s/monday/MON/g" <br />
-           OutputSed "s/sunday/SUN/g" <br />
+           InputSed "s/monday/MON/g" <br />
+           InputSed "s/sunday/SUN/g" <br />
            </indent>
         &lt;/Directory&gt; <br />
         </indent>

Re: svn commit: r691418 [1/2] - in /httpd/httpd/trunk: ./ docs/manual/mod/ modules/filters/

Posted by Nick Kew <ni...@webthing.com>.
On Thu, 4 Sep 2008 10:12:39 -0400
Jim Jagielski <ji...@jaguNET.com> wrote:

> 
> On Sep 4, 2008, at 6:38 AM, Ruediger Pluem wrote:
> >
> > More comments possibly later.
> >
> 
> For the most part, it looks like many of the optimizations
> in mod_substitute, esp regarding efficient use of buckets,
> is lacking in mod_sed...

Anecdotal and benchmark data suggest that it's competitive
in performance terms.  Now that it's in svn, we can further
improve it.  Ruediger's comments look like a start on that.

-- 
Nick Kew

Application Development with Apache - the Apache Modules Book
http://www.apachetutor.org/

Re: svn commit: r691418 [1/2] - in /httpd/httpd/trunk: ./ docs/manual/mod/ modules/filters/

Posted by Jim Jagielski <ji...@jaguNET.com>.
On Sep 4, 2008, at 6:38 AM, Ruediger Pluem wrote:
>
> More comments possibly later.
>

For the most part, it looks like many of the optimizations
in mod_substitute, esp regarding efficient use of buckets,
is lacking in mod_sed...


Re: svn commit: r691418 [1/2] - in /httpd/httpd/trunk: ./ docs/manual/mod/ modules/filters/

Posted by "Roy T. Fielding" <fi...@gbiv.com>.
On Sep 5, 2008, at 4:04 PM, Basant Kumar kukreja wrote:

> Just a note : sed original code also have AT&T copyright.
> http://src.opensolaris.org/source/xref/onnv/onnv-gate/usr/src/ 
> ucbcmd/sed/sed1.c
> http://src.opensolaris.org/source/xref/onnv/onnv-gate/usr/src/ 
> ucbcmd/sed/sed0.c
> http://src.opensolaris.org/source/xref/onnv/onnv-gate/usr/src/ 
> ucbcmd/sed/sed.h
>
> Regards,
> Basant.
>
> On Thu, Sep 04, 2008 at 09:20:43AM -0400, Jim Jagielski wrote:
>> Apologies if this was already discussed and resolved, but I
>> see quite a number of:
>>
>>  *      Copyright (c) 1984 AT&T
>>  *        All Rights Reserved
>>
>> in various files... Can we track the IP of those parts to
>> ensure that we (and Sun) have the required license to use
>> them??

FTR, it is only necessary that Sun do the diligence -- we rely on them
to do so as part of the contribution process.  Since this is code from
the OpenSolaris gate, I know that Sun has a defined legal process
to vet their code before publication.  From the headers, it looks like
this was part of the Unix system licensed to Sun during the big
System 5 deal with AT&T, and thus I see no problem with accepting the
contribution from Sun.

However, our license text should be on top of each file, not after the
copyright notices.  I will fix that when I get a chance.

....Roy


Re: svn commit: r691418 [1/2] - in /httpd/httpd/trunk: ./ docs/manual/mod/ modules/filters/

Posted by Basant Kumar kukreja <Ba...@Sun.COM>.
Just a note : sed original code also have AT&T copyright.
http://src.opensolaris.org/source/xref/onnv/onnv-gate/usr/src/ucbcmd/sed/sed1.c
http://src.opensolaris.org/source/xref/onnv/onnv-gate/usr/src/ucbcmd/sed/sed0.c
http://src.opensolaris.org/source/xref/onnv/onnv-gate/usr/src/ucbcmd/sed/sed.h

Regards,
Basant.

On Thu, Sep 04, 2008 at 09:20:43AM -0400, Jim Jagielski wrote:
> Apologies if this was already discussed and resolved, but I
> see quite a number of:
>
>  *      Copyright (c) 1984 AT&T
>  *        All Rights Reserved
>
> in various files... Can we track the IP of those parts to
> ensure that we (and Sun) have the required license to use
> them??

Re: svn commit: r691418 [1/2] - in /httpd/httpd/trunk: ./ docs/manual/mod/ modules/filters/

Posted by Jim Jagielski <ji...@jaguNET.com>.
Apologies if this was already discussed and resolved, but I
see quite a number of:

  *      Copyright (c) 1984 AT&T
  *        All Rights Reserved

in various files... Can we track the IP of those parts to
ensure that we (and Sun) have the required license to use
them??

Re: svn commit: r691418 [1/2] - in /httpd/httpd/trunk: ./ docs/manual/mod/ modules/filters/

Posted by Ruediger Pluem <rp...@apache.org>.

On 09/03/2008 01:01 AM, niq@apache.org wrote:
> Author: niq
> Date: Tue Sep  2 16:01:47 2008
> New Revision: 691418
> 
> URL: http://svn.apache.org/viewvc?rev=691418&view=rev
> Log:
> Commit mod_sed: enable filtering of HTTP Requests and Responses through sed
> 
> Added:
>     httpd/httpd/trunk/docs/manual/mod/mod_sed.xml
>     httpd/httpd/trunk/modules/filters/libsed.h
>     httpd/httpd/trunk/modules/filters/mod_sed.c
>     httpd/httpd/trunk/modules/filters/regexp.c
>     httpd/httpd/trunk/modules/filters/regexp.h
>     httpd/httpd/trunk/modules/filters/sed.h
>     httpd/httpd/trunk/modules/filters/sed0.c
>     httpd/httpd/trunk/modules/filters/sed1.c
> Modified:
>     httpd/httpd/trunk/CHANGES
>     httpd/httpd/trunk/modules/filters/config.m4
> 
> Modified: httpd/httpd/trunk/CHANGES

> Added: httpd/httpd/trunk/docs/manual/mod/mod_sed.xml
> URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/docs/manual/mod/mod_sed.xml?rev=691418&view=auto
> ==============================================================================
> --- httpd/httpd/trunk/docs/manual/mod/mod_sed.xml (added)
> +++ httpd/httpd/trunk/docs/manual/mod/mod_sed.xml Tue Sep  2 16:01:47 2008
> @@ -0,0 +1,141 @@
> +<?xml version="1.0"?>
> +<!DOCTYPE modulesynopsis SYSTEM "../style/modulesynopsis.dtd">
> +<?xml-stylesheet type="text/xsl" href="../style/manual.en.xsl"?>
> +<!--
> + Licensed to the Apache Software Foundation (ASF) under one or more
> + contributor license agreements.  See the NOTICE file distributed with
> + this work for additional information regarding copyright ownership.
> + The ASF licenses this file to You under the Apache License, Version 2.0
> + (the "License"); you may not use this file except in compliance with
> + the License.  You may obtain a copy of the License at
> +
> +     http://www.apache.org/licenses/LICENSE-2.0
> +
> + Unless required by applicable law or agreed to in writing, software
> + distributed under the License is distributed on an "AS IS" BASIS,
> + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + See the License for the specific language governing permissions and
> + limitations under the License.
> +-->
> +
> +<modulesynopsis metafile="mod_sed.xml.meta">
> +
> +<name>mod_sed</name>
> +<description>Filtering Input (request) and Output (response) content using sed commands</description>
> +<status>Experimental</status>
> +<sourcefile>mod_sed.c sed0.c sed1.c regexp.c regexp.h sed.h</sourcefile>
> +<identifier>sed_module</identifier>
> +<compatibility>Available in Apache 2.3 and later</compatibility>
> +
> +<summary>
> +<p>
> +mod_sed is a in-process content filter. mod_sed filter implement the sed edit
> +commands implemented by Solaris 10 sed
> +program as described in <a href="http://docs.sun.com/app/docs/doc/816-5165/sed-1b?a=view">man
> +page</a>. However unlike sed, mod_sed doesn't take data from
> +standard
> +input. Instead filter act on the entity data sent between client and
> +server. mod_sed can be used as a input or output filter. mod_sed is a
> +content filter which means that it can not be used to modify client or
> +server http headers.
> +</p>
> +<p>
> +mod_sed output filter accept a chunk of data and execute the sed scripts on data and generates the output which is passed to next filter in the filter chain.
> +</p>
> +
> +<p>
> +mod_sed input filter reads the data from next filter in filter chain and executes the sed scripts and returns the generated data to caller filter in the filter chain.
> +</p>
> +
> +<p>
> +Both input and output filter only process the data if new line character is seen in the content. At the end of the data, rest of the data is treated as last line.
> +</p>
> +
> +<p>A tutorial article on mod_sed, and why it is more powerful than simple
> +string or regular expression search and replace, is available in <a
> +href="http://blogs.sun.com/basant/entry/using_mod_sed_to_filter">on
> +the author's blog</a>.</p>
> +
> +</summary>
> +
> +<directivesynopsis>
> +<name>OutputSed</name>
> +<description>Sed command for filter the response content</description>
> +<syntax>OutputSed <var>sed-command</var></syntax>
> +<contextlist><context>directory</context><context>.htaccess</context>
> +</contextlist>
> +
> +<usage>
> +    <p>The <directive>OutputSed</directive> directive specify the sed
> +    command which will be executed on the response.
> +    </p>
> +</usage>
> +</directivesynopsis>
> +
> +<directivesynopsis>
> +<name>InputSed</name>
> +<description>Sed command to filter the request data (typically post data)</description>
> +<syntax>InputSed <var>sed-command</var></syntax>
> +<contextlist><context>directory</context><context>.htaccess</context>
> +</contextlist>
> +
> +<usage>
> +    <p>The <directive>InputSed</directive> directive specify the sed command
> +    which will be executed on the request data e.g POST data.
> +    </p>
> +</usage>
> +</directivesynopsis>
> +
> +<section id="sampleconf"><title>Sample Configuration</title>
> +    <example><title>Adding a output filter </title>
> +         # In following example, sed filter will replace the string <br />
> +         # "monday" to "MON" and the string "sunday" to SUN in html document <br />
> +         # before sending to client. <br />
> +        <indent>
> +        &lt;Directory "/var/www/docs/sed"&gt; <br />
> +           <indent>
> +           AddOutputFilter Sed html <br />
> +           OutputSed "s/monday/MON/g" <br />
> +           OutputSed "s/sunday/SUN/g" <br />
> +           </indent>
> +        &lt;/Directory&gt; <br />
> +        </indent>
> +    </example>
> +
> +    <example><title>Adding a input filter </title>
> +         # In following example, sed filter will replace the string <br />
> +         # "monday" to "MON" and the string "sunday" to SUN in the POST data <br />
> +         # sent to php <br />
> +        <indent>
> +        &lt;Directory "/var/www/docs/sed"&gt; <br />
> +           <indent>
> +           AddInputFilter Sed php <br />
> +           OutputSed "s/monday/MON/g" <br />
> +           OutputSed "s/sunday/SUN/g" <br />

I guess it should be InputSed here.

> +           </indent>
> +        &lt;/Directory&gt; <br />
> +        </indent>
> +    </example>
> +</section>
> +<section id="sed_commands"><title>Sed Commands</title>
> +    <p>
> +    Complete details of the sed command can be found from
> +   <a href="http://docs.sun.com/app/docs/doc/816-5165/sed-1b?a=view">sed man
> +page</a>.
> +    </p>
> +    <dl>
> +        <dt><code>b</code></dt>
> +        <dd>branch to the label specified (Similar to goto)</dd>
> +        <dt><code>h</code></dt>
> +        <dd>Copy the current line to hold buffer.</dd>
> +        <dt><code>H</code></dt>
> +        <dd>Append the current line to hold buffer.</dd>
> +        <dt><code>g</code></dt>
> +        <dd>Copy the hold buffer into the current line</dd>
> +        <dt><code>G</code></dt>
> +        <dd>Append the hold buffer into the current line</dd>
> +        <dt><code>x</code></dt>
> +        <dd>Swap the content of hold buffer and current line</dd>
> +    </dl>
> +</section>
> +</modulesynopsis>
> 


> Added: httpd/httpd/trunk/modules/filters/mod_sed.c
> URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/filters/mod_sed.c?rev=691418&view=auto
> ==============================================================================
> --- httpd/httpd/trunk/modules/filters/mod_sed.c (added)
> +++ httpd/httpd/trunk/modules/filters/mod_sed.c Tue Sep  2 16:01:47 2008
> @@ -0,0 +1,437 @@
> +/*
> + * Copyright (c) 2005, 2008 Sun Microsystems, Inc. All Rights Reserved.
> + * Use is subject to license terms.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at
> + *  http://www.apache.org/licenses/LICENSE-2.0. 
> + * 
> + * Unless required by applicable law or agreed to in writing, software 
> + * distributed under the License is distributed on an "AS IS" BASIS, 
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 
> + * or implied. 
> + * See the License for the specific language governing permissions and
> + * limitations under the License. 
> + */
> +
> +#include "httpd.h"
> +#include "http_config.h"
> +#include "http_log.h"
> +#include "apr_strings.h"
> +#include "apr_general.h"
> +#include "util_filter.h"
> +#include "apr_buckets.h"
> +#include "http_request.h"
> +#include "libsed.h"
> +
> +static const char *sed_filter_name = "Sed";
> +#define MODSED_OUTBUF_SIZE 4000

Why no using 8195 here? This would create a buffer with the size of a whole page
on most platforms or a multiple thereof.

> +
> +typedef struct sed_expr_config
> +{
> +    sed_commands_t *sed_cmds;
> +    const char *last_error;
> +} sed_expr_config;
> +
> +typedef struct sed_config
> +{
> +    sed_expr_config output;
> +    sed_expr_config input;
> +} sed_config;
> +
> +/* Context for filter invocation for single HTTP request */
> +typedef struct sed_filter_ctxt
> +{
> +    sed_eval_t eval;
> +    request_rec *r;
> +    apr_bucket_brigade *bb;
> +    char *outbuf;
> +    char *curoutbuf;
> +    int bufsize;
> +} sed_filter_ctxt;
> +
> +module AP_MODULE_DECLARE_DATA sed_module;
> +
> +/* This function will be call back from libsed functions if there is any error
> + * happend during execution of sed scripts
> + */
> +static void log_sed_errf(void *data, const char *error)
> +{
> +    request_rec *r = (request_rec *) data;
> +    ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, error);
> +}
> +
> +/* This function will be call back from libsed functions if there is any
> + * compilation error.
> + */
> +static void sed_compile_errf(void *data, const char *error)
> +{
> +    sed_expr_config *sed_cfg = (sed_expr_config *) data;
> +    sed_cfg->last_error = error;
> +}
> +
> +/*
> + * flush_output_buffer
> + * Flush the  output data (stored in ctx->outbuf)
> + */
> +static void flush_output_buffer(sed_filter_ctxt *ctx, char* buf, int sz)
> +{
> +    int size = ctx->curoutbuf - ctx->outbuf;
> +    char *out;
> +    apr_bucket *b;
> +    if (size + sz <= 0)
> +        return;
> +    out = apr_palloc(ctx->r->pool, size + sz);

This happens over and over again. If the text document is large this can lead to excessive
memory consumption. IMHO it would be better to add this buffer to ctx->outbuf (if there is
something to add), create an transient bucket from ctx->outbuf and pass it down the filter
chain. If the following filters cannot process this bucket, they will set it aside. Setting
aside a transient bucket uses the bucket allocator instead of the pool which prevents
excessive memory consumption.

> +    if (size) {
> +        memcpy(out, ctx->outbuf, size);
> +    }
> +    if (buf && (sz > 0)) {
> +        memcpy(out + size, buf, sz);
> +    }
> +    /* Reset the output buffer position */
> +    ctx->curoutbuf = ctx->outbuf;
> +    b = apr_bucket_pool_create(out, size + sz, ctx->r->pool,
> +                               ctx->r->connection->bucket_alloc);
> +    APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
> +}
> +
> +/* This is a call back function. When libsed wants to generate the output,
> + * this function will be invoked.
> + */
> +static void sed_write_output(void *dummy, char *buf, int sz)
> +{
> +    /* dummy is basically filter context. Context is passed during invocation
> +     * of sed_eval_buffer
> +     */
> +    sed_filter_ctxt *ctx = (sed_filter_ctxt *) dummy;
> +    if (((ctx->curoutbuf - ctx->outbuf) + sz) >= ctx->bufsize) {
> +        /* flush current buffer */
> +        flush_output_buffer(ctx, buf, sz);
> +    }
> +    else {
> +        memcpy(ctx->curoutbuf, buf, sz);
> +        ctx->curoutbuf += sz;
> +    }
> +}
> +
> +/* Compile a sed expression. Compiled context is saved in sed_cfg->sed_cmds.
> + * Memory required for compilation context is allocated from cmd->pool.
> + */
> +static apr_status_t compile_sed_expr(sed_expr_config *sed_cfg,
> +                                     cmd_parms *cmd,
> +                                     const char *expr)
> +{
> +    apr_status_t status = APR_SUCCESS;
> +
> +    if (!sed_cfg->sed_cmds) {
> +        sed_commands_t *sed_cmds;
> +        sed_cmds = apr_pcalloc(cmd->pool, sizeof(sed_commands_t));
> +        status = sed_init_commands(sed_cmds, sed_compile_errf, sed_cfg,
> +                                   cmd->pool);
> +        if (status != APR_SUCCESS) {
> +            sed_destroy_commands(sed_cmds);
> +            return status;
> +        }
> +        sed_cfg->sed_cmds = sed_cmds;
> +    }
> +    status = sed_compile_string(sed_cfg->sed_cmds, expr);
> +    if (status != APR_SUCCESS) {
> +        sed_destroy_commands(sed_cfg->sed_cmds);
> +        sed_cfg->sed_cmds = NULL;
> +    }
> +    return status;
> +}
> +
> +/* sed eval cleanup function */
> +static apr_status_t sed_eval_cleanup(void *data)
> +{
> +    sed_eval_t *eval = (sed_eval_t *) data;
> +    sed_destroy_eval(eval);
> +    return APR_SUCCESS;
> +}
> +
> +/* Initialize sed filter context. If successful then context is set in f->ctx
> + */
> +static apr_status_t init_context(ap_filter_t *f, sed_expr_config *sed_cfg)
> +{
> +    apr_status_t status;
> +    sed_filter_ctxt* ctx;
> +    request_rec *r = f->r;
> +    /* Create the context. Call sed_init_eval. libsed will generated
> +     * output by calling sed_write_output and generates any error by
> +     * invoking log_sed_errf.
> +     */
> +    ctx = apr_pcalloc(r->pool, sizeof(sed_filter_ctxt));
> +    ctx->r = r;
> +    ctx->bb = NULL;
> +    status = sed_init_eval(&ctx->eval, sed_cfg->sed_cmds, log_sed_errf,
> +                           r, &sed_write_output, r->pool);
> +    if (status != APR_SUCCESS) {
> +        return status;
> +    }
> +    apr_pool_cleanup_register(r->pool, &ctx->eval, sed_eval_cleanup,
> +                              apr_pool_cleanup_null);
> +    ctx->bufsize = MODSED_OUTBUF_SIZE;
> +    ctx->outbuf = apr_palloc(r->pool, ctx->bufsize + 1);
> +    ctx->curoutbuf = ctx->outbuf;
> +    f->ctx = ctx;
> +    return APR_SUCCESS;
> +}
> +
> +/* Entry function for Sed output filter */
> +static apr_status_t sed_response_filter(ap_filter_t *f,
> +                                        apr_bucket_brigade *bb)
> +{
> +    apr_bucket *b;
> +    apr_status_t status;
> +    sed_config *cfg = ap_get_module_config(f->r->per_dir_config,
> +                                           &sed_module);
> +    sed_filter_ctxt *ctx = f->ctx;
> +    sed_expr_config *sed_cfg = &cfg->output;
> +
> +    if ((sed_cfg == NULL) || (sed_cfg->sed_cmds == NULL)) {
> +        /* No sed expressions */
> +        ap_remove_output_filter(f);
> +        return ap_pass_brigade(f->next, bb);
> +    }
> +
> +    if (ctx == NULL) {
> +
> +        if (APR_BUCKET_IS_EOS(APR_BRIGADE_FIRST(bb))) {
> +            /* no need to run sed filter for Head requests */
> +            ap_remove_output_filter(f);
> +            return ap_pass_brigade(f->next, bb);
> +        }
> +
> +        status = init_context(f, sed_cfg);
> +        if (status != APR_SUCCESS)
> +             return status;
> +        ctx = f->ctx;
> +    }
> +
> +    ctx->bb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);

This should be only done once when we create the context. In the next iteration
we should only call apr_brigade_cleanup on ctx->bb.


> +
> +    /* Here is the main logic. Iterate through all the buckets, read the
> +     * content of the bucket, call sed_eval_buffer on the data.
> +     * sed_eval_buffer will read the data line by line, run filters on each
> +     * line. sed_eval_buffer will generates the output by calling
> +     * sed_write_output which will add the output to ctx->bb. At the end of
> +     * the loop, ctx->bb is passed to the next filter in chain. At the end of
> +     * the data, if new line is not found then sed_eval_buffer will store the
> +     * data in it's own buffer.
> +     *
> +     * Once eos bucket is found then sed_finalize_eval will flush the rest of
> +     * the data. If there is no new line in last line of data, new line is
> +     * appended (that is a solaris sed behavior). libsed's internal memory for
> +     * evaluation is allocated on request's pool so it will be cleared once
> +     * request is over.
> +     *
> +     * If flush bucket is found then append the the flush bucket to ctx->bb
> +     * and pass it to next filter. There may be some data which will still be
> +     * in sed's internal buffer which can't be flushed until new line
> +     * character is arrived.
> +     */
> +    for (b = APR_BRIGADE_FIRST(bb); b != APR_BRIGADE_SENTINEL(bb);) {
> +        const char *buf = NULL;
> +        apr_size_t bytes = 0;
> +        if (APR_BUCKET_IS_EOS(b)) {
> +            apr_bucket *b1 = APR_BUCKET_NEXT(b);
> +            /* Now clean up the internal sed buffer */
> +            sed_finalize_eval(&ctx->eval, ctx);
> +            flush_output_buffer(ctx, NULL, 0);
> +            APR_BUCKET_REMOVE(b);
> +            /* Insert the eos bucket to ctx->bb brigade */
> +            APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
> +            b = b1;
> +        }
> +        else if (APR_BUCKET_IS_FLUSH(b)) {
> +            apr_bucket *b1 = APR_BUCKET_NEXT(b);
> +            APR_BUCKET_REMOVE(b);
> +            APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
> +            status = ap_pass_brigade(f->next, ctx->bb);
> +            apr_brigade_cleanup(ctx->bb);
> +            if (status != APR_SUCCESS) {
> +                return status;
> +            }
> +            b = b1;
> +        }
> +        else if (APR_BUCKET_IS_METADATA(b)) {
> +            b = APR_BUCKET_NEXT(b);

Dropping META_BUCKETS is wrong and dangerous. If you can't handle them just pass them
along down the chain unhandled. Maybe someone else knows how to deal with them.

> +        }
> +        else if (apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ)
> +                 == APR_SUCCESS) {
> +            apr_bucket *b1 = APR_BUCKET_NEXT(b);
> +            status = sed_eval_buffer(&ctx->eval, buf, bytes, ctx);
> +            if (status != APR_SUCCESS) {
> +                return status;
> +            }
> +            flush_output_buffer(ctx, NULL, 0);
> +            APR_BUCKET_REMOVE(b);

apr_bucket_delete is enough.

> +            apr_bucket_delete(b);
> +            b = b1;
> +        }
> +        else {
> +            apr_bucket *b1 = APR_BUCKET_NEXT(b);
> +            APR_BUCKET_REMOVE(b);
> +            b = b1;
> +        }
> +    }
> +    apr_brigade_cleanup(bb);
> +    return ap_pass_brigade(f->next, ctx->bb);

Passing the brigade at the end of the loop can be a bad thing. Think of a brigade that only
contains one file bucket with a text file that is 2 G's large. In this case we will consume
2 G's main memory. See also http://httpd.apache.org/docs/trunk/en/developer/output-filters.html#filtering

> +}
> +
> +/* Entry function for Sed input filter */
> +static apr_status_t sed_request_filter(ap_filter_t *f,
> +                                       apr_bucket_brigade *bb,
> +                                       ap_input_mode_t mode,
> +                                       apr_read_type_e block,
> +                                       apr_off_t readbytes)
> +{
> +    sed_config *cfg = ap_get_module_config(f->r->per_dir_config,
> +                                           &sed_module);
> +    sed_filter_ctxt *ctx = f->ctx;
> +    apr_status_t status;
> +    sed_expr_config *sed_cfg = &cfg->input;
> +
> +    if (mode != AP_MODE_READBYTES) {

Why don't we remove ourselves from the chain here?

> +        return ap_get_brigade(f->next, bb, mode, block, readbytes);
> +    }
> +
> +    if ((sed_cfg == NULL) || (sed_cfg->sed_cmds == NULL)) {
> +        /* No sed expression */

Why don't we remove ourselves from the chain here?

> +        return ap_get_brigade(f->next, bb, mode, block, readbytes);
> +    }
> +
> +    if (!ctx) {
> +        if (!ap_is_initial_req(f->r)) {
> +            ap_remove_input_filter(f);
> +            /* XXX : Should we filter the sub requests too */

Any specific reason why we don't do this on subrequests?

> +            return ap_get_brigade(f->next, bb, mode, block, readbytes);
> +        }
> +        status = init_context(f, sed_cfg);
> +        if (status != APR_SUCCESS)
> +             return status;
> +        ctx = f->ctx;
> +        ctx->bb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
> +    }
> +
> +    /* Here is the logic :
> +     * Read the readbytes data from next level fiter into bbinp. Loop through
> +     * the buckets in bbinp and read the data from buckets and invoke
> +     * sed_eval_buffer on the data. libsed will generate it's output using
> +     * sed_write_output which will add data in ctx->bb. Do it until it have
> +     * atleast one bucket bucket in ctx->bb. At the end of data eos bucket
> +     * should be there.
> +     *
> +     * Once eos bucket is seen, then invoke sed_finalize_eval to clear the
> +     * output. If the last byte of data is not a new line character then sed
> +     * will add a new line to the data that is default sed behaviour. Note
> +     * that using this filter with POST data, caller may not expect this
> +     * behaviour.
> +     *
> +     * If next level fiter generate the flush bucket, we can't do much about
> +     * it. If we want to return the flush bucket in brigade bb (to the caller)
> +     * the question is where to add it?
> +     */
> +    while (APR_BRIGADE_EMPTY(ctx->bb)) {
> +        apr_bucket_brigade *bbinp;
> +        apr_bucket *b;
> +
> +        /* read the bytes from next level filter */
> +        bbinp = apr_brigade_create(f->r->pool, f->c->bucket_alloc);

Bad idea. Create this brigade once, store it in the context and reuse it by cleaning
it with apr_brigade_cleanup.

> +        status = ap_get_brigade(f->next, bbinp, mode, block, readbytes);
> +        if (status != APR_SUCCESS) {
> +            return status;
> +        }
> +        for (b = APR_BRIGADE_FIRST(bbinp); b != APR_BRIGADE_SENTINEL(bbinp);
> +             b = APR_BUCKET_NEXT(b)) {
> +            const char *buf = NULL;
> +            apr_size_t bytes;
> +
> +            if (APR_BUCKET_IS_EOS(b)) {
> +                /* eos bucket. Clear the internal sed buffers */
> +                sed_finalize_eval(&ctx->eval, ctx);
> +                flush_output_buffer(ctx, NULL, 0);
> +                APR_BUCKET_REMOVE(b);
> +                APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
> +                break;
> +            }
> +            else if (APR_BUCKET_IS_FLUSH(b)) {
> +                /* What should we do with flush bucket */
> +                continue;

As above. If you don't understand it pass it along. Maybe somebody else in the chain
nows how to deal with it.
What about other META buckets here? They aren't handled at all.


> +            }
> +            if (apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ)
> +                     == APR_SUCCESS) {
> +                status = sed_eval_buffer(&ctx->eval, buf, bytes, ctx);
> +                if (status != APR_SUCCESS)
> +                    return status;
> +                flush_output_buffer(ctx, NULL, 0);
> +            }
> +        }
> +        apr_brigade_cleanup(bbinp);
> +        apr_brigade_destroy(bbinp);
> +    }
> +
> +    if (!APR_BRIGADE_EMPTY(ctx->bb)) {
> +        apr_bucket_brigade *newbb = NULL;
> +        apr_bucket *b = NULL;
> +
> +        /* This may return APR_INCOMPLETE which should be fine */
> +        apr_brigade_partition(ctx->bb, readbytes, &b);
> +
> +        newbb = apr_brigade_split(ctx->bb, b);

Better use apr_brigade_split_ex here to avoid the creation of another brigade from the
pool.

> +        APR_BRIGADE_CONCAT(bb, ctx->bb);
> +        APR_BRIGADE_CONCAT(ctx->bb, newbb);
> +    }
> +    return APR_SUCCESS;
> +}
> +
> +static const char *sed_add_expr(cmd_parms *cmd, void *cfg, const char *arg)
> +{
> +    int offset = (int) (long) cmd->info;
> +    sed_expr_config *sed_cfg = 
> +                (sed_expr_config *) (((char *) cfg) + offset);
> +    if (compile_sed_expr(sed_cfg, cmd, arg) != APR_SUCCESS) {
> +        return apr_psprintf(cmd->temp_pool,
> +                            "Failed to compile sed expression. %s",
> +                            sed_cfg->last_error);
> +    }
> +    return NULL;
> +}
> +
> +static void *create_sed_dir_config(apr_pool_t *p, char *s)
> +{
> +    sed_config *cfg = apr_pcalloc(p, sizeof(sed_config));
> +    return cfg;
> +}
> +
> +static const command_rec sed_filter_cmds[] = {
> +    AP_INIT_TAKE1("OutputSed", sed_add_expr,
> +                  (void *) APR_OFFSETOF(sed_config, output),
> +                  ACCESS_CONF,
> +                  "Sed regular expression for Response"),
> +    AP_INIT_TAKE1("InputSed", sed_add_expr,
> +                  (void *) APR_OFFSETOF(sed_config, input),
> +                  ACCESS_CONF,
> +                  "Sed regular expression for Request"),
> +    {NULL}
> +};
> +
> +static void register_hooks(apr_pool_t *p)
> +{
> +    ap_register_output_filter(sed_filter_name, sed_response_filter, NULL,
> +                              AP_FTYPE_RESOURCE);
> +    ap_register_input_filter(sed_filter_name, sed_request_filter, NULL,
> +                             AP_FTYPE_RESOURCE);
> +}
> +
> +module AP_MODULE_DECLARE_DATA sed_module = {
> +    STANDARD20_MODULE_STUFF,
> +    create_sed_dir_config,      /* dir config creater */
> +    NULL,                       /* dir merger --- default is to override */
> +    NULL,                       /* server config */
> +    NULL,                       /* merge server config */
> +    sed_filter_cmds,            /* command table */
> +    register_hooks              /* register hooks */
> +};
> 

More comments possibly later.

Regards

RĂ¼diger