You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sling.apache.org by cz...@apache.org on 2009/08/18 16:30:35 UTC

svn commit: r805427 - in /sling/trunk/contrib/extensions/rewriter: ./ src/main/java/org/apache/ src/main/java/org/apache/sling/rewriter/ src/main/java/org/apache/sling/rewriter/impl/

Author: cziegeler
Date: Tue Aug 18 14:30:34 2009
New Revision: 805427

URL: http://svn.apache.org/viewvc?rev=805427&view=rev
Log:
Use new html parser and new serializers which pass the complete html as sax events through the pipeline.

Added:
    sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HTMLSerializer.java   (with props)
    sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/XHTMLSerializer.java   (with props)
Removed:
    sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/AttributeList.java
    sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/TagTokenizer.java
Modified:
    sling/trunk/contrib/extensions/rewriter/pom.xml
    sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/   (props changed)
    sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/ProcessingComponentConfiguration.java
    sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HtmlGeneratorFactory.java
    sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HtmlSerializerFactory.java
    sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/ProcessingComponentConfigurationImpl.java
    sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/ProcessorManagerImpl.java

Modified: sling/trunk/contrib/extensions/rewriter/pom.xml
URL: http://svn.apache.org/viewvc/sling/trunk/contrib/extensions/rewriter/pom.xml?rev=805427&r1=805426&r2=805427&view=diff
==============================================================================
--- sling/trunk/contrib/extensions/rewriter/pom.xml (original)
+++ sling/trunk/contrib/extensions/rewriter/pom.xml Tue Aug 18 14:30:34 2009
@@ -64,7 +64,9 @@
                             org.apache.sling.rewriter
                         </Export-Package>
                         <Private-Package>
-                            org.apache.sling.rewriter.impl
+                            org.apache.sling.rewriter.impl,
+                            org.apache.cocoon.components.serializers.encoding,
+                            org.apache.cocoon.components.serializers.util
                         </Private-Package>
                     </instructions>
                 </configuration>
@@ -74,6 +76,12 @@
 
     <dependencies>
         <dependency>
+            <groupId>commons-lang</groupId>
+            <artifactId>commons-lang</artifactId>
+            <version>2.4</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
             <groupId>org.apache.sling</groupId>
             <artifactId>org.apache.sling.api</artifactId>
             <version>2.0.6</version>
@@ -81,6 +89,12 @@
         </dependency>
         <dependency>
             <groupId>org.apache.sling</groupId>
+            <artifactId>org.apache.sling.commons.html</artifactId>
+            <version>0.9.0-SNAPSHOT</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.sling</groupId>
             <artifactId>org.apache.sling.jcr.resource</artifactId>
             <version>2.0.2-incubator</version>
             <scope>provided</scope>

Propchange: sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/
------------------------------------------------------------------------------
--- svn:externals (added)
+++ svn:externals Tue Aug 18 14:30:34 2009
@@ -0,0 +1 @@
+cocoon https://svn.apache.org/repos/asf/cocoon/trunk/blocks/cocoon-serializers/cocoon-serializers-charsets/src/main/java/org/apache/cocoon

Modified: sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/ProcessingComponentConfiguration.java
URL: http://svn.apache.org/viewvc/sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/ProcessingComponentConfiguration.java?rev=805427&r1=805426&r2=805427&view=diff
==============================================================================
--- sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/ProcessingComponentConfiguration.java (original)
+++ sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/ProcessingComponentConfiguration.java Tue Aug 18 14:30:34 2009
@@ -16,7 +16,7 @@
  */
 package org.apache.sling.rewriter;
 
-import java.util.Map;
+import org.apache.sling.api.resource.ValueMap;
 
 
 /**
@@ -34,5 +34,5 @@
      * Return the configuration for this component.
      * @return The configuration for this component or an empty map if there is none.
      */
-    Map<String, Object> getConfiguration();
+    ValueMap getConfiguration();
 }

Added: sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HTMLSerializer.java
URL: http://svn.apache.org/viewvc/sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HTMLSerializer.java?rev=805427&view=auto
==============================================================================
--- sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HTMLSerializer.java (added)
+++ sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HTMLSerializer.java Tue Aug 18 14:30:34 2009
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sling.rewriter.impl;
+
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+
+import org.apache.sling.rewriter.ProcessingComponentConfiguration;
+import org.apache.sling.rewriter.ProcessingContext;
+import org.apache.sling.rewriter.Serializer;
+
+/**
+ * <p>A serializer converting XHTML into plain old HTML.</p>
+ *
+ * <p>For configuration options of this serializer, please look at the
+ * {@link XHTMLSerializer} and
+ * {@link org.apache.cocoon.components.serializers.util.EncodingSerializer}.</p>
+ *
+ * <p>Any of the XHTML document type declared or used will be converted into
+ * its HTML 4.01 counterpart, and in addition to those a "compatible" doctype
+ * can be supported to exploit a couple of shortcuts into MSIE's rendering
+ * engine. The values for the <code>doctype-default</code> can then be:</p>
+ *
+ * <dl>
+ *   <dt>"<code>none</code>"</dt>
+ *   <dd>Not to emit any dococument type declaration.</dd>
+ *   <dt>"<code>compatible</code>"</dt>
+ *   <dd>The HTML 4.01 Transitional (exploiting MSIE shortcut).</dd>
+ *   <dt>"<code>strict</code>"</dt>
+ *   <dd>The HTML 4.01 Strict document type.</dd>
+ *   <dt>"<code>loose</code>"</dt>
+ *   <dd>The HTML 4.01 Transitional document type.</dd>
+ *   <dt>"<code>frameset</code>"</dt>
+ *   <dd>The HTML 4.01 Frameset document type.</dd>
+ * </dl>
+ *
+ */
+public class HTMLSerializer
+    extends org.apache.cocoon.components.serializers.util.HTMLSerializer
+    implements Serializer {
+
+    /**
+     * @see org.apache.sling.rewriter.Serializer#init(org.apache.sling.rewriter.ProcessingContext, org.apache.sling.rewriter.ProcessingComponentConfiguration)
+     */
+    public void init(ProcessingContext context,
+            ProcessingComponentConfiguration config)
+    throws IOException {
+        String encoding = config.getConfiguration().get("encoding", "UTF-8");
+        try {
+            this.setEncoding(encoding);
+        } catch (UnsupportedEncodingException exception) {
+            throw new IOException("Encoding not supported: " + encoding);
+        }
+
+        this.setIndentPerLevel(config.getConfiguration().get("indent", 0));
+        this.setDoctypeDefault(config.getConfiguration().get("doctype-default", String.class));
+
+        this.setup(context.getRequest());
+        this.setOutputStream(context.getOutputStream());
+    }
+
+    /**
+     * @see org.apache.sling.rewriter.Serializer#dispose()
+     */
+    public void dispose() {
+        // nothing to do
+    }
+}

Propchange: sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HTMLSerializer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HTMLSerializer.java
------------------------------------------------------------------------------
    svn:keywords = author date id revision rev url

Propchange: sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HTMLSerializer.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HtmlGeneratorFactory.java
URL: http://svn.apache.org/viewvc/sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HtmlGeneratorFactory.java?rev=805427&r1=805426&r2=805427&view=diff
==============================================================================
--- sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HtmlGeneratorFactory.java (original)
+++ sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HtmlGeneratorFactory.java Tue Aug 18 14:30:34 2009
@@ -16,15 +16,12 @@
  */
 package org.apache.sling.rewriter.impl;
 
-import java.io.CharArrayWriter;
+import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.PrintWriter;
-import java.io.Writer;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Set;
+import java.io.StringWriter;
 
-import org.apache.sling.commons.osgi.OsgiUtil;
+import org.apache.sling.commons.html.HtmlParser;
 import org.apache.sling.rewriter.Generator;
 import org.apache.sling.rewriter.GeneratorFactory;
 import org.apache.sling.rewriter.ProcessingComponentConfiguration;
@@ -42,136 +39,47 @@
  */
 public class HtmlGeneratorFactory implements GeneratorFactory {
 
-    public static String NAMESPACE = "http://org.apache.sling/rewriter";
-
-    public static String END_SLASH_ATTR = "endSlash";
-
-    public static String QUOTES_ATTR = "quotes";
-
-    public static final String INCLUDE_TAGS_PROPERTY = "includeTags";
-
-    private static final Set<String> DEFAULT_INCLUSION_TAGS;
-    static {
-        DEFAULT_INCLUSION_TAGS = new HashSet<String>();
-        DEFAULT_INCLUSION_TAGS.add("A");
-        DEFAULT_INCLUSION_TAGS.add("/A");
-        DEFAULT_INCLUSION_TAGS.add("IMG");
-        DEFAULT_INCLUSION_TAGS.add("AREA");
-        DEFAULT_INCLUSION_TAGS.add("FORM");
-        DEFAULT_INCLUSION_TAGS.add("BASE");
-        DEFAULT_INCLUSION_TAGS.add("LINK");
-        DEFAULT_INCLUSION_TAGS.add("SCRIPT");
-        DEFAULT_INCLUSION_TAGS.add("/BODY");
-    }
+    /** @scr.reference */
+    private HtmlParser htmlParser;
 
     /**
      * @see org.apache.sling.rewriter.GeneratorFactory#createGenerator()
      */
     public Generator createGenerator() {
-        return new HtmlGenerator();
+        return new HtmlGenerator(htmlParser);
     }
 
-    public static final class HtmlGenerator extends Writer implements Generator {
+    public static final class HtmlGenerator implements Generator {
 
-        /** Internal character buffer */
-        private final CharArrayWriter buffer = new CharArrayWriter(256);
+        private final StringWriter writer;
 
-        /** Tag tokenizer */
-        private final TagTokenizer tokenizer = new TagTokenizer();
+        private final HtmlParser htmlParser;
 
-        /** Tag name buffer */
-        private final CharArrayWriter tagNameBuffer = new CharArrayWriter(30);
-
-        /** Tag name */
-        private String tagName;
-
-        /** Tag inclusion list */
-        private Set<String> tagInclusionSet;
-
-        /** Registered content handler */
         private ContentHandler contentHandler;
 
-        /** Parse state constant */
-        private final static int PS_OUTSIDE = 0;
-
-        /** Parse state constant */
-        private final static int PS_TAG = PS_OUTSIDE + 1;
-
-        /** Parse state constant */
-        private final static int PS_SCRIPT = PS_TAG + 1;
-
-        /** Parse state constant */
-        private final static int PS_COMMENT = PS_SCRIPT + 1;
-
-        /** Parse state constant */
-        private final static int PS_STRING = PS_COMMENT + 1;
-
-        /** Tag type constant */
-        private final static int TT_NONE = 0;
-
-        /** Tag type constant */
-        private final static int TT_MAYBE = 1;
-
-        /** Tag type constant */
-        private final static int TT_TAG = 2;
-
-        /** Parse state */
-        private int parseState;
-
-        /** Parse substate */
-        private int parseSubState;
-
-        /** Previous parse state */
-        private int prevParseState;
-
-        /** Current tag type */
-        private int tagType;
-
-        /** Quote character */
-        private char quoteChar;
-
-        /** Did we already start parsing? */
-        boolean started = false;
-
-        private final org.xml.sax.helpers.AttributesImpl atts = new org.xml.sax.helpers.AttributesImpl();
-
-        /**
-         * Default constructor.
-         */
-        public HtmlGenerator() {
-            this.tagInclusionSet = DEFAULT_INCLUSION_TAGS;
+        public HtmlGenerator(final HtmlParser parser) {
+            this.htmlParser = parser;
+            this.writer = new StringWriter();
         }
 
         /**
-         * @see org.apache.sling.rewriter.Generator#init(org.apache.sling.rewriter.ProcessingContext, org.apache.sling.rewriter.ProcessingComponentConfiguration)
+         * @see org.apache.sling.rewriter.Generator#finished()
          */
-        public void init(ProcessingContext pipelineContext,
-                         ProcessingComponentConfiguration config) {
-            final String[] includedTags = OsgiUtil.toStringArray(config
-                    .getConfiguration().get(INCLUDE_TAGS_PROPERTY));
-            if (includedTags != null && includedTags.length > 0) {
-                this.tagInclusionSet = new HashSet<String>();
-                for (final String tag : includedTags) {
-                    this.tagInclusionSet.add(tag);
-                }
-                // we always have to include body!
-                this.tagInclusionSet.add("/BODY");
-            }
+        public void finished() throws IOException, SAXException {
+            this.htmlParser.parse(new ByteArrayInputStream(this.writer.toString().getBytes("UTF-8")), "UTF-8", this.contentHandler);
         }
 
         /**
          * @see org.apache.sling.rewriter.Generator#getWriter()
          */
         public PrintWriter getWriter() {
-            return new PrintWriter(this);
-        }
-
-        public Set<String> getTagInclusionSet() {
-            return tagInclusionSet;
+            return new PrintWriter(writer);
         }
 
-        public void setTagInclusionSet(Set<String> tagInclusionSet) {
-            this.tagInclusionSet = tagInclusionSet;
+        public void init(ProcessingContext context,
+                         ProcessingComponentConfiguration config)
+        throws IOException {
+            // nothing to do
         }
 
         /**
@@ -181,499 +89,6 @@
             this.contentHandler = handler;
         }
 
-        @Override
-        public void write(char cbuf[], int off, int len) throws IOException {
-            this.update(cbuf, 0, len);
-        }
-
-        @Override
-        public void write(int b) throws IOException {
-            final char[] buf = new char[] { (char) b };
-            this.update(buf, 0, buf.length);
-        }
-
-        @Override
-        public void close() throws IOException {
-            // nothing to do
-        }
-
-        @Override
-        public void flush() throws IOException {
-            flushBuffer();
-
-            // send 0-length characters that eventually let the serializer flush the
-            // underlying writer
-            try {
-                this.contentHandler.characters(new char[0], 0, 0);
-            } catch (SAXException e) {
-                throw handle(e);
-            }
-        }
-
-        /**
-         * Feed characters to the parser.
-         *
-         * @param buf
-         *            character buffer
-         * @param off
-         *            offset where characters start
-         * @param len
-         *            length of affected buffer
-         */
-        public void update(char[] buf, int off, int len) throws IOException {
-            if (!this.started) {
-                try {
-                    this.contentHandler.startDocument();
-                } catch (SAXException se) {
-                    this.handle(se);
-                }
-                this.started = true;
-            }
-            int start = off;
-            int end = off + len;
-
-            for (int curr = start; curr < end; curr++) {
-                char c = buf[curr];
-
-                switch (parseState) {
-                case PS_OUTSIDE:
-                    if (c == '<') {
-                        if (curr > start) {
-                            try {
-                                this.contentHandler.characters(buf, start, curr - start);
-                            } catch (SAXException e) {
-                                throw handle(e);
-                            }
-                        }
-                        start = curr;
-                        parseState = PS_TAG;
-                        parseSubState = 0;
-                        tagType = TT_MAYBE;
-                        resetTagName();
-                    }
-                    break;
-                case PS_TAG:
-                    switch (parseSubState) {
-                    case -1:
-                        if (c == '"' || c == '\'') {
-                            quoteChar = c;
-                            prevParseState = parseState;
-                            parseState = PS_STRING;
-                            parseSubState = -1;
-                        } else if (c == '>') {
-                            parseState = PS_OUTSIDE;
-                        }
-                        break;
-                    case 0:
-                        if (c == '!') {
-                            parseState = PS_COMMENT;
-                            parseSubState = 0;
-                            tagType = TT_NONE;
-                            flushBuffer();
-                        } else if (c == '"' || c == '\'') {
-                            quoteChar = c;
-                            prevParseState = parseState;
-                            parseState = PS_STRING;
-                            parseSubState = -1;
-                            tagType = TT_NONE;
-                            flushBuffer();
-                        } else if (c == '>') {
-                            parseState = PS_OUTSIDE;
-                            tagType = TT_NONE;
-                            flushBuffer();
-                        } else if (!Character.isWhitespace(c)) {
-                            tagNameBuffer.write(c);
-                            parseSubState = 1;
-                        } else {
-                            parseSubState = -1;
-                            tagType = TT_NONE;
-                            flushBuffer();
-                        }
-                        break;
-                    case 1:
-                        if (c == '"' || c == '\'') {
-                            if (tagIncluded(getTagName())) {
-                                tagType = TT_TAG;
-                            } else {
-                                tagType = TT_NONE;
-                                flushBuffer();
-                            }
-                            parseSubState = 2;
-                            quoteChar = c;
-                            prevParseState = parseState;
-                            parseState = PS_STRING;
-                        } else if (c == '>') {
-                            if (tagIncluded(getTagName())) {
-                                processTag(buf, start, curr - start + 1);
-                                start = curr + 1;
-                                tagType = TT_NONE;
-                                parseState = getTagName()
-                                        .equalsIgnoreCase("SCRIPT") ? PS_SCRIPT
-                                        : PS_OUTSIDE;
-                                parseSubState = 0;
-                            } else {
-                                tagType = TT_NONE;
-                                flushBuffer();
-                                parseState = PS_OUTSIDE;
-                            }
-                        } else if (Character.isWhitespace(c)) {
-                            if (tagIncluded(getTagName())) {
-                                tagType = TT_TAG;
-                            } else {
-                                tagType = TT_NONE;
-                                flushBuffer();
-                            }
-                            parseSubState = 2;
-                        } else {
-                            tagNameBuffer.write(c);
-                        }
-                        break;
-                    case 2:
-                        if (c == '"' || c == '\'') {
-                            quoteChar = c;
-                            prevParseState = parseState;
-                            parseState = PS_STRING;
-                        } else if (c == '>') {
-                            if (tagType == TT_TAG) {
-                                processTag(buf, start, curr - start + 1);
-                                start = curr + 1;
-                            } else {
-                                flushBuffer();
-                            }
-                            tagType = TT_NONE;
-                            parseState = getTagName().equalsIgnoreCase("SCRIPT") ? PS_SCRIPT
-                                    : PS_OUTSIDE;
-                            parseSubState = 0;
-                        }
-                        break;
-                    }
-                    break;
-                case PS_COMMENT:
-                    switch (parseSubState) {
-                    case 0:
-                        if (c == '-') {
-                            parseSubState++;
-                        } else if (c == '"' || c == '\'') {
-                            quoteChar = c;
-                            prevParseState = PS_TAG;
-                            parseState = PS_STRING;
-                            parseSubState = -1;
-                            tagType = TT_NONE;
-                            flushBuffer();
-                        } else if (c == '>') {
-                            parseState = PS_OUTSIDE;
-                            tagType = TT_NONE;
-                            flushBuffer();
-                        } else {
-                            parseState = PS_TAG;
-                            parseSubState = -1;
-                            tagType = TT_NONE;
-                            flushBuffer();
-                        }
-                        break;
-                    case 1:
-                        if (c == '-') {
-                            parseSubState++;
-                        } else if (c == '"' || c == '\'') {
-                            quoteChar = c;
-                            prevParseState = PS_TAG;
-                            parseState = PS_STRING;
-                            parseSubState = -1;
-                            tagType = TT_NONE;
-                            flushBuffer();
-                        } else if (c == '>') {
-                            parseState = PS_OUTSIDE;
-                            tagType = TT_NONE;
-                            flushBuffer();
-                        } else {
-                            parseState = PS_TAG;
-                            parseSubState = -1;
-                            tagType = TT_NONE;
-                            flushBuffer();
-                        }
-                        break;
-                    case 2:
-                        if (c == '-') {
-                            parseSubState++;
-                        }
-                        break;
-                    case 3:
-                        if (c == '-') {
-                            parseSubState++;
-                        } else {
-                            parseSubState = 2;
-                        }
-                        break;
-                    case 4:
-                        if (c == '>') {
-                            parseState = PS_OUTSIDE;
-                        } else {
-                            parseSubState = 2;
-                        }
-                        break;
-                    }
-                    break;
-
-                case PS_SCRIPT:
-                    switch (parseSubState) {
-                    case 0:
-                        if (c == '<') {
-                            if (curr > start) {
-                                try {
-                                    this.contentHandler.characters(buf, start, curr - start);
-                                } catch (SAXException e) {
-                                    throw handle(e);
-                                }
-                            }
-                            start = curr;
-                            tagType = TT_MAYBE;
-                            parseSubState++;
-                        }
-                        break;
-                    case 1:
-                        if (c == '/') {
-                            parseSubState++;
-                        } else {
-                            tagType = TT_NONE;
-                            flushBuffer();
-                            parseSubState = 0;
-                        }
-                        break;
-                    case 2:
-                        if (c == 'S' || c == 's') {
-                            parseSubState++;
-                        } else {
-                            tagType = TT_NONE;
-                            flushBuffer();
-                            parseSubState = 0;
-                        }
-                        break;
-                    case 3:
-                        if (c == 'C' || c == 'c') {
-                            parseSubState++;
-                        } else {
-                            tagType = TT_NONE;
-                            flushBuffer();
-                            parseSubState = 0;
-                        }
-                        break;
-                    case 4:
-                        if (c == 'R' || c == 'r') {
-                            parseSubState++;
-                        } else {
-                            tagType = TT_NONE;
-                            flushBuffer();
-                            parseSubState = 0;
-                        }
-                        break;
-                    case 5:
-                        if (c == 'I' || c == 'i') {
-                            parseSubState++;
-                        } else {
-                            tagType = TT_NONE;
-                            flushBuffer();
-                            parseSubState = 0;
-                        }
-                        break;
-                    case 6:
-                        if (c == 'P' || c == 'p') {
-                            parseSubState++;
-                        } else {
-                            tagType = TT_NONE;
-                            flushBuffer();
-                            parseSubState = 0;
-                        }
-                        break;
-                    case 7:
-                        if (c == 'T' || c == 't') {
-                            parseSubState++;
-                        } else {
-                            tagType = TT_NONE;
-                            flushBuffer();
-                            parseSubState = 0;
-                        }
-                        break;
-                    case 8:
-                        if (c == '>') {
-                            if (tagIncluded("SCRIPT")) {
-                                processTag(buf, start, curr - start + 1);
-                                start = curr + 1;
-                            } else {
-                                flushBuffer();
-                            }
-                            tagType = TT_NONE;
-                            parseState = PS_OUTSIDE;
-                        }
-                        break;
-                    }
-                    break;
-
-                case PS_STRING:
-                    if (c == quoteChar) {
-                        parseState = prevParseState;
-                    }
-                    break;
-                }
-            }
-            if (start < end) {
-                if (tagType == TT_NONE) {
-                    try {
-                        this.contentHandler.characters(buf, start, end - start);
-                    } catch (SAXException e) {
-                        throw handle(e);
-                    }
-                } else {
-                    buffer.write(buf, start, end - start);
-                }
-            }
-        }
-
-        /**
-         * Return a flag indicating whether the parser has still some undigested
-         * characters left.
-         *
-         * @return <code>true</code> if the parser still contains characters
-         *         <code>false</code> otherwise
-         */
-        public boolean isEmpty() {
-            return buffer.size() == 0;
-        }
-
-        /**
-         * Finish the parsing process. This forces the parser to flush the
-         * characters still held in its internal buffer, regardless of the parsing
-         * state.
-         */
-        public void finished() throws IOException {
-            flushBuffer();
-            if ( this.started ) {
-                try {
-                    this.contentHandler.endDocument();
-                } catch (SAXException e) {
-                    throw handle(e);
-                }
-
-            }
-        }
-
-        /**
-         * Clears the internal tagname buffer and cache
-         */
-        protected void resetTagName() {
-            tagName = null;
-            tagNameBuffer.reset();
-        }
-
-        /**
-         * Returns the tagname scanned and resets the internal tagname buffer
-         *
-         * @return tagname
-         */
-        protected String getTagName() {
-            if (tagName == null) {
-                tagName = tagNameBuffer.toString();
-            }
-            return tagName;
-        }
-
-        /**
-         * Flush internal buffer. This forces the parser to flush the characters
-         * still held in its internal buffer, regardless of the parsing state.
-         */
-        protected void flushBuffer() throws IOException {
-            if (buffer.size() > 0) {
-                char[] ch = buffer.toCharArray();
-                try {
-                    this.contentHandler.characters(ch, 0, ch.length);
-                } catch (SAXException e) {
-                    throw handle(e);
-                }
-                buffer.reset();
-            }
-        }
-
-        /**
-         * Returns a flag indicating whether the specified tag should be included in
-         * the parsing process.
-         *
-         * @param tagName
-         *            tag name
-         * @return <code>true</code> if the tag should be processed, else
-         *         <code>false</code>
-         */
-        protected boolean tagIncluded(String tagName) {
-            return tagInclusionSet == null
-                    || tagInclusionSet.contains(tagName.toUpperCase());
-        }
-
-        /**
-         * Decompose a tag and feed it to the document handler.
-         *
-         * @param ch
-         *            character data
-         * @param off
-         *            offset where character data starts
-         * @param len
-         *            length of character data
-         */
-        protected void processTag(char[] ch, int off, int len) throws IOException {
-            buffer.write(ch, off, len);
-
-            char[] snippet = buffer.toCharArray();
-
-            tokenizer.tokenize(snippet, 0, snippet.length);
-            if (!tokenizer.endTag()) {
-                final AttributeList attributes = tokenizer.attributes();
-                final String tagName = tokenizer.tagName();
-                this.atts.clear();
-
-                final char[] quotes = new char[attributes.attributeCount()];
-                int index = 0;
-                final Iterator<String> names = attributes.attributeNames();
-                while (names.hasNext()) {
-                    final String name = names.next();
-                    final String value = attributes.getValue(name);
-                    if (value != null) {
-                        this.atts.addAttribute("", name, name, "CDATA", value);
-                    } else {
-                        this.atts.addAttribute("", name, name, "CDATA", "");
-                    }
-                    quotes[index] = attributes.getQuoteChar(name);
-                    index++;
-                }
-                if ( index > 0 ) {
-                    this.atts.addAttribute(NAMESPACE, QUOTES_ATTR, QUOTES_ATTR, "CDATA", new String(quotes));
-                }
-                try {
-                    if (tokenizer.endSlash()) {
-                        // just tell the contentHandler via attribute that an end slash is needed
-                        this.atts.addAttribute("", END_SLASH_ATTR, END_SLASH_ATTR, "CDATA", "");
-                    }
-                    this.contentHandler.startElement("", tagName, tagName, this.atts);
-                } catch (SAXException e) {
-                    throw handle(e);
-                }
-            } else {
-                try {
-                    final String tagName = tokenizer.tagName();
-                    this.contentHandler.endElement("", tagName, tagName);
-                } catch (SAXException e) {
-                    throw handle(e);
-                }
-            }
-
-            buffer.reset();
-        }
-
-        protected final IOException handle(SAXException se) {
-            if ( se.getCause() != null && se.getCause() instanceof IOException) {
-                return (IOException)se.getCause();
-            }
-            final IOException ioe = new IOException("Unable to parse document");
-            ioe.initCause(se);
-            return ioe;
-        }
-
         /**
          * @see org.apache.sling.rewriter.Generator#dispose()
          */
@@ -681,4 +96,4 @@
             // nothing to do
         }
     }
-}
+}
\ No newline at end of file

Modified: sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HtmlSerializerFactory.java
URL: http://svn.apache.org/viewvc/sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HtmlSerializerFactory.java?rev=805427&r1=805426&r2=805427&view=diff
==============================================================================
--- sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HtmlSerializerFactory.java (original)
+++ sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HtmlSerializerFactory.java Tue Aug 18 14:30:34 2009
@@ -16,18 +16,8 @@
  */
 package org.apache.sling.rewriter.impl;
 
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.sling.rewriter.ProcessingComponentConfiguration;
-import org.apache.sling.rewriter.ProcessingContext;
 import org.apache.sling.rewriter.Serializer;
 import org.apache.sling.rewriter.SerializerFactory;
-import org.xml.sax.Attributes;
-import org.xml.sax.Locator;
-import org.xml.sax.SAXException;
 
 /**
  * This sax serializer serializes html-
@@ -37,185 +27,10 @@
  */
 public class HtmlSerializerFactory implements SerializerFactory {
 
-    private static final List<String> DEFAULT_EMPTY_TAGS;
-    static {
-        DEFAULT_EMPTY_TAGS = new ArrayList<String>();
-        DEFAULT_EMPTY_TAGS.add("br");
-        DEFAULT_EMPTY_TAGS.add("area");
-        DEFAULT_EMPTY_TAGS.add("link");
-        DEFAULT_EMPTY_TAGS.add("img");
-        DEFAULT_EMPTY_TAGS.add("param");
-        DEFAULT_EMPTY_TAGS.add("hr");
-        DEFAULT_EMPTY_TAGS.add("input");
-        DEFAULT_EMPTY_TAGS.add("col");
-        DEFAULT_EMPTY_TAGS.add("base");
-        DEFAULT_EMPTY_TAGS.add("meta");
-    }
-
     /**
      * @see org.apache.sling.rewriter.SerializerFactory#createSerializer()
      */
     public Serializer createSerializer() {
-        return new HtmlSerializer();
-    }
-
-    public class HtmlSerializer implements Serializer {
-
-        private PrintWriter delegatee;
-
-        private List<String> emptyTags;
-
-        /**
-         * @see org.apache.sling.rewriter.Serializer#init(org.apache.sling.rewriter.ProcessingContext, org.apache.sling.rewriter.ProcessingComponentConfiguration)
-         */
-        public void init(ProcessingContext pipelineContext, ProcessingComponentConfiguration config)
-        throws IOException {
-            final PrintWriter writer = pipelineContext.getWriter();
-            if (writer == null) {
-                throw new IllegalArgumentException("Writer must not be null");
-            }
-            this.delegatee = writer;
-            this.emptyTags = DEFAULT_EMPTY_TAGS;
-        }
-
-
-        /**
-         * @see org.xml.sax.ContentHandler#endDocument()
-         */
-        public void endDocument() throws SAXException {
-            this.delegatee.flush();
-        }
-
-        /**
-         * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
-         */
-        public void startElement(String uri, String localName, String name,
-                Attributes atts) throws SAXException {
-            boolean endSlash = false;
-            this.delegatee.write('<');
-            this.delegatee.write(localName);
-            final String quotesString = atts.getValue(HtmlGeneratorFactory.NAMESPACE, HtmlGeneratorFactory.QUOTES_ATTR);
-            for(int i=0; i<atts.getLength(); i++) {
-                if (HtmlGeneratorFactory.END_SLASH_ATTR.equals(atts.getQName(i))) {
-                    endSlash = true;
-                } else if (!HtmlGeneratorFactory.NAMESPACE.equals(atts.getURI(i))) {
-                    this.delegatee.write(' ');
-                    this.delegatee.write(atts.getLocalName(i));
-                    final String value = atts.getValue(i);
-                    if ( value != null ) {
-                        this.delegatee.write('=');
-                        final char quoteChar;
-                        if ( quotesString != null && quotesString.length() > i ) {
-                            quoteChar = quotesString.charAt(i);
-                        } else {
-                            quoteChar = '\"';
-                        }
-                        this.delegatee.write(quoteChar);
-                        this.delegatee.write(value);
-                        this.delegatee.write(quoteChar);
-                    }
-                }
-            }
-
-            if (endSlash) {
-                // XHTML
-                this.delegatee.write("/");
-            }
-
-            this.delegatee.write(">");
-        }
-
-        /**
-         * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
-         */
-        public void endElement(String uri, String localName, String name)
-                throws SAXException {
-            if (!emptyTags.contains(localName)) {
-                this.delegatee.write("</");
-                this.delegatee.write(localName);
-                this.delegatee.write('>');
-            }
-        }
-
-
-        /**
-         * Called by HtmlParser if character data and tags are to be output for which no
-         * special handling is necessary.
-         *
-         * @param buffer Character data
-         * @param offset Offset where character data starts
-         * @param length The length of the character data
-         */
-        public void characters(char[] buffer, int offset, int length)
-        throws SAXException {
-            //this.checkStartElement(false);
-
-            // special hack for flush request, see bug #20068
-            if (length == 0) {
-                this.delegatee.flush();
-            } else {
-                this.delegatee.write(buffer, offset, length);
-            }
-        }
-
-        /**
-         * @see org.xml.sax.ContentHandler#endPrefixMapping(java.lang.String)
-         */
-        public void endPrefixMapping(String prefix) throws SAXException {
-            // not used atm
-        }
-
-        /**
-         * @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int)
-         */
-        public void ignorableWhitespace(char[] ch, int start, int length)
-                throws SAXException {
-            // not used atm
-        }
-
-        /**
-         * @see org.xml.sax.ContentHandler#processingInstruction(java.lang.String, java.lang.String)
-         */
-        public void processingInstruction(String target, String data)
-                throws SAXException {
-            // not used atm
-        }
-
-        /**
-         * @see org.xml.sax.ContentHandler#setDocumentLocator(org.xml.sax.Locator)
-         */
-        public void setDocumentLocator(Locator locator) {
-            // not used atm
-        }
-
-        /**
-         * @see org.xml.sax.ContentHandler#skippedEntity(java.lang.String)
-         */
-        public void skippedEntity(String name) throws SAXException {
-            // not used atm
-        }
-
-        /**
-         * @see org.xml.sax.ContentHandler#startDocument()
-         */
-        public void startDocument() throws SAXException {
-            // not used atm
-        }
-
-
-        /**
-         * @see org.xml.sax.ContentHandler#startPrefixMapping(java.lang.String, java.lang.String)
-         */
-        public void startPrefixMapping(String prefix, String uri)
-                throws SAXException {
-            // not used atm
-        }
-
-        /**
-         * @see org.apache.sling.rewriter.Serializer#dispose()
-         */
-        public void dispose() {
-            // nothing to do
-        }
+        return new HTMLSerializer();
     }
 }

Modified: sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/ProcessingComponentConfigurationImpl.java
URL: http://svn.apache.org/viewvc/sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/ProcessingComponentConfigurationImpl.java?rev=805427&r1=805426&r2=805427&view=diff
==============================================================================
--- sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/ProcessingComponentConfigurationImpl.java (original)
+++ sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/ProcessingComponentConfigurationImpl.java Tue Aug 18 14:30:34 2009
@@ -19,6 +19,8 @@
 import java.util.HashMap;
 import java.util.Map;
 
+import org.apache.sling.api.resource.ValueMap;
+import org.apache.sling.api.wrappers.ValueMapDecorator;
 import org.apache.sling.rewriter.ProcessingComponentConfiguration;
 
 /**
@@ -27,7 +29,7 @@
 public class ProcessingComponentConfigurationImpl implements ProcessingComponentConfiguration {
 
     /** Empty configuration map. */
-    public static final Map<String, Object> EMPTY_CONFIG = new HashMap<String, Object>();
+    public static final ValueMap EMPTY_CONFIG = new ValueMapDecorator(new HashMap<String, Object>());
 
     /** Empty configuration. */
     public static final ProcessingComponentConfiguration EMPTY = new ProcessingComponentConfigurationImpl("<empty>", null);
@@ -36,7 +38,7 @@
     private final String type;
 
     /** The configuration map. */
-    private final Map<String, Object> configuration;
+    private final ValueMap configuration;
 
     /**
      * Create a new configuration.
@@ -45,13 +47,13 @@
      */
     public ProcessingComponentConfigurationImpl(final String type, final Map<String, Object> config) {
         this.type = type;
-        this.configuration = (config == null ? EMPTY_CONFIG : config);
+        this.configuration = (config == null ? EMPTY_CONFIG : new ValueMapDecorator(config));
     }
 
     /**
      * @see org.apache.sling.rewriter.ProcessingComponentConfiguration#getConfiguration()
      */
-    public Map<String, Object> getConfiguration() {
+    public ValueMap getConfiguration() {
         return this.configuration;
     }
 

Modified: sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/ProcessorManagerImpl.java
URL: http://svn.apache.org/viewvc/sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/ProcessorManagerImpl.java?rev=805427&r1=805426&r2=805427&view=diff
==============================================================================
--- sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/ProcessorManagerImpl.java (original)
+++ sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/ProcessorManagerImpl.java Tue Aug 18 14:30:34 2009
@@ -150,7 +150,7 @@
         this.addProcessor("*", "", new ProcessorConfigurationImpl(
                 new String[] {MIME_TYPE_HTML}, // content types
                 null, // paths,
-                null, // extension
+                new String[] {"html"}, // extension
                 -1,   // order
                 new ProcessingComponentConfigurationImpl("html-generator", null), // generator config
                 null, // transformer config

Added: sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/XHTMLSerializer.java
URL: http://svn.apache.org/viewvc/sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/XHTMLSerializer.java?rev=805427&view=auto
==============================================================================
--- sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/XHTMLSerializer.java (added)
+++ sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/XHTMLSerializer.java Tue Aug 18 14:30:34 2009
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sling.rewriter.impl;
+
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+
+import org.apache.sling.rewriter.ProcessingComponentConfiguration;
+import org.apache.sling.rewriter.ProcessingContext;
+import org.apache.sling.rewriter.Serializer;
+
+/**
+ * <p>A pedantinc XHTML serializer encoding all recognized entities with their
+ * proper HTML names.</p>
+ *
+ * <p>For configuration options of this serializer, please look at the
+ * {@link org.apache.cocoon.components.serializers.util.EncodingSerializer},
+ * in addition to those, this serializer also support the specification of a
+ * default doctype. This default will be used if no document type is received
+ * in the SAX events.
+ *
+ * <p>The value <i>mytype</i> can be one of:</p>
+ *
+ * <dl>
+ *   <dt>"<code>none</code>"</dt>
+ *   <dd>Not to emit any dococument type declaration.</dd>
+ *   <dt>"<code>strict</code>"</dt>
+ *   <dd>The XHTML 1.0 Strict document type.</dd>
+ *   <dt>"<code>loose</code>"</dt>
+ *   <dd>The XHTML 1.0 Transitional document type.</dd>
+ *   <dt>"<code>frameset</code>"</dt>
+ *   <dd>The XHTML 1.0 Frameset document type.</dd>
+ * </dl>
+ *
+ */
+public class XHTMLSerializer
+    extends org.apache.cocoon.components.serializers.util.XHTMLSerializer
+    implements Serializer  {
+
+    /**
+     * @see org.apache.sling.rewriter.Serializer#init(org.apache.sling.rewriter.ProcessingContext, org.apache.sling.rewriter.ProcessingComponentConfiguration)
+     */
+    public void init(ProcessingContext context,
+                     ProcessingComponentConfiguration config)
+    throws IOException {
+        final String encoding = config.getConfiguration().get("encoding", "UTF-8");
+        try {
+            this.setEncoding(encoding);
+        } catch (UnsupportedEncodingException exception) {
+            throw new IOException("Encoding not supported: " + encoding);
+        }
+        setIndentPerLevel(config.getConfiguration().get("indent", 0));
+        setOmitXmlDeclaration(config.getConfiguration().get("omit-xml-declaration", "no"));
+        setDoctypeDefault(config.getConfiguration().get("doctype-default", String.class));
+
+        this.setup(context.getRequest());
+        this.setOutputStream(context.getOutputStream());
+    }
+
+    /**
+     * @see org.apache.sling.rewriter.Serializer#dispose()
+     */
+    public void dispose() {
+        // nothing to do
+    }
+}

Propchange: sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/XHTMLSerializer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/XHTMLSerializer.java
------------------------------------------------------------------------------
    svn:keywords = author date id revision rev url

Propchange: sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/XHTMLSerializer.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain