You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sling.apache.org by cz...@apache.org on 2009/08/18 16:30:35 UTC
svn commit: r805427 - in /sling/trunk/contrib/extensions/rewriter: ./
src/main/java/org/apache/ src/main/java/org/apache/sling/rewriter/
src/main/java/org/apache/sling/rewriter/impl/
Author: cziegeler
Date: Tue Aug 18 14:30:34 2009
New Revision: 805427
URL: http://svn.apache.org/viewvc?rev=805427&view=rev
Log:
Use new html parser and new serializers which pass the complete html as sax events through the pipeline.
Added:
sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HTMLSerializer.java (with props)
sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/XHTMLSerializer.java (with props)
Removed:
sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/AttributeList.java
sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/TagTokenizer.java
Modified:
sling/trunk/contrib/extensions/rewriter/pom.xml
sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/ (props changed)
sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/ProcessingComponentConfiguration.java
sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HtmlGeneratorFactory.java
sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HtmlSerializerFactory.java
sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/ProcessingComponentConfigurationImpl.java
sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/ProcessorManagerImpl.java
Modified: sling/trunk/contrib/extensions/rewriter/pom.xml
URL: http://svn.apache.org/viewvc/sling/trunk/contrib/extensions/rewriter/pom.xml?rev=805427&r1=805426&r2=805427&view=diff
==============================================================================
--- sling/trunk/contrib/extensions/rewriter/pom.xml (original)
+++ sling/trunk/contrib/extensions/rewriter/pom.xml Tue Aug 18 14:30:34 2009
@@ -64,7 +64,9 @@
org.apache.sling.rewriter
</Export-Package>
<Private-Package>
- org.apache.sling.rewriter.impl
+ org.apache.sling.rewriter.impl,
+ org.apache.cocoon.components.serializers.encoding,
+ org.apache.cocoon.components.serializers.util
</Private-Package>
</instructions>
</configuration>
@@ -74,6 +76,12 @@
<dependencies>
<dependency>
+ <groupId>commons-lang</groupId>
+ <artifactId>commons-lang</artifactId>
+ <version>2.4</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
<groupId>org.apache.sling</groupId>
<artifactId>org.apache.sling.api</artifactId>
<version>2.0.6</version>
@@ -81,6 +89,12 @@
</dependency>
<dependency>
<groupId>org.apache.sling</groupId>
+ <artifactId>org.apache.sling.commons.html</artifactId>
+ <version>0.9.0-SNAPSHOT</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.sling</groupId>
<artifactId>org.apache.sling.jcr.resource</artifactId>
<version>2.0.2-incubator</version>
<scope>provided</scope>
Propchange: sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/
------------------------------------------------------------------------------
--- svn:externals (added)
+++ svn:externals Tue Aug 18 14:30:34 2009
@@ -0,0 +1 @@
+cocoon https://svn.apache.org/repos/asf/cocoon/trunk/blocks/cocoon-serializers/cocoon-serializers-charsets/src/main/java/org/apache/cocoon
Modified: sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/ProcessingComponentConfiguration.java
URL: http://svn.apache.org/viewvc/sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/ProcessingComponentConfiguration.java?rev=805427&r1=805426&r2=805427&view=diff
==============================================================================
--- sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/ProcessingComponentConfiguration.java (original)
+++ sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/ProcessingComponentConfiguration.java Tue Aug 18 14:30:34 2009
@@ -16,7 +16,7 @@
*/
package org.apache.sling.rewriter;
-import java.util.Map;
+import org.apache.sling.api.resource.ValueMap;
/**
@@ -34,5 +34,5 @@
* Return the configuration for this component.
* @return The configuration for this component or an empty map if there is none.
*/
- Map<String, Object> getConfiguration();
+ ValueMap getConfiguration();
}
Added: sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HTMLSerializer.java
URL: http://svn.apache.org/viewvc/sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HTMLSerializer.java?rev=805427&view=auto
==============================================================================
--- sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HTMLSerializer.java (added)
+++ sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HTMLSerializer.java Tue Aug 18 14:30:34 2009
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sling.rewriter.impl;
+
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+
+import org.apache.sling.rewriter.ProcessingComponentConfiguration;
+import org.apache.sling.rewriter.ProcessingContext;
+import org.apache.sling.rewriter.Serializer;
+
+/**
+ * <p>A serializer converting XHTML into plain old HTML.</p>
+ *
+ * <p>For configuration options of this serializer, please look at the
+ * {@link XHTMLSerializer} and
+ * {@link org.apache.cocoon.components.serializers.util.EncodingSerializer}.</p>
+ *
+ * <p>Any of the XHTML document type declared or used will be converted into
+ * its HTML 4.01 counterpart, and in addition to those a "compatible" doctype
+ * can be supported to exploit a couple of shortcuts into MSIE's rendering
+ * engine. The values for the <code>doctype-default</code> can then be:</p>
+ *
+ * <dl>
+ * <dt>"<code>none</code>"</dt>
+ * <dd>Not to emit any dococument type declaration.</dd>
+ * <dt>"<code>compatible</code>"</dt>
+ * <dd>The HTML 4.01 Transitional (exploiting MSIE shortcut).</dd>
+ * <dt>"<code>strict</code>"</dt>
+ * <dd>The HTML 4.01 Strict document type.</dd>
+ * <dt>"<code>loose</code>"</dt>
+ * <dd>The HTML 4.01 Transitional document type.</dd>
+ * <dt>"<code>frameset</code>"</dt>
+ * <dd>The HTML 4.01 Frameset document type.</dd>
+ * </dl>
+ *
+ */
+public class HTMLSerializer
+ extends org.apache.cocoon.components.serializers.util.HTMLSerializer
+ implements Serializer {
+
+ /**
+ * @see org.apache.sling.rewriter.Serializer#init(org.apache.sling.rewriter.ProcessingContext, org.apache.sling.rewriter.ProcessingComponentConfiguration)
+ */
+ public void init(ProcessingContext context,
+ ProcessingComponentConfiguration config)
+ throws IOException {
+ String encoding = config.getConfiguration().get("encoding", "UTF-8");
+ try {
+ this.setEncoding(encoding);
+ } catch (UnsupportedEncodingException exception) {
+ throw new IOException("Encoding not supported: " + encoding);
+ }
+
+ this.setIndentPerLevel(config.getConfiguration().get("indent", 0));
+ this.setDoctypeDefault(config.getConfiguration().get("doctype-default", String.class));
+
+ this.setup(context.getRequest());
+ this.setOutputStream(context.getOutputStream());
+ }
+
+ /**
+ * @see org.apache.sling.rewriter.Serializer#dispose()
+ */
+ public void dispose() {
+ // nothing to do
+ }
+}
Propchange: sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HTMLSerializer.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HTMLSerializer.java
------------------------------------------------------------------------------
svn:keywords = author date id revision rev url
Propchange: sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HTMLSerializer.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HtmlGeneratorFactory.java
URL: http://svn.apache.org/viewvc/sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HtmlGeneratorFactory.java?rev=805427&r1=805426&r2=805427&view=diff
==============================================================================
--- sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HtmlGeneratorFactory.java (original)
+++ sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HtmlGeneratorFactory.java Tue Aug 18 14:30:34 2009
@@ -16,15 +16,12 @@
*/
package org.apache.sling.rewriter.impl;
-import java.io.CharArrayWriter;
+import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.PrintWriter;
-import java.io.Writer;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Set;
+import java.io.StringWriter;
-import org.apache.sling.commons.osgi.OsgiUtil;
+import org.apache.sling.commons.html.HtmlParser;
import org.apache.sling.rewriter.Generator;
import org.apache.sling.rewriter.GeneratorFactory;
import org.apache.sling.rewriter.ProcessingComponentConfiguration;
@@ -42,136 +39,47 @@
*/
public class HtmlGeneratorFactory implements GeneratorFactory {
- public static String NAMESPACE = "http://org.apache.sling/rewriter";
-
- public static String END_SLASH_ATTR = "endSlash";
-
- public static String QUOTES_ATTR = "quotes";
-
- public static final String INCLUDE_TAGS_PROPERTY = "includeTags";
-
- private static final Set<String> DEFAULT_INCLUSION_TAGS;
- static {
- DEFAULT_INCLUSION_TAGS = new HashSet<String>();
- DEFAULT_INCLUSION_TAGS.add("A");
- DEFAULT_INCLUSION_TAGS.add("/A");
- DEFAULT_INCLUSION_TAGS.add("IMG");
- DEFAULT_INCLUSION_TAGS.add("AREA");
- DEFAULT_INCLUSION_TAGS.add("FORM");
- DEFAULT_INCLUSION_TAGS.add("BASE");
- DEFAULT_INCLUSION_TAGS.add("LINK");
- DEFAULT_INCLUSION_TAGS.add("SCRIPT");
- DEFAULT_INCLUSION_TAGS.add("/BODY");
- }
+ /** @scr.reference */
+ private HtmlParser htmlParser;
/**
* @see org.apache.sling.rewriter.GeneratorFactory#createGenerator()
*/
public Generator createGenerator() {
- return new HtmlGenerator();
+ return new HtmlGenerator(htmlParser);
}
- public static final class HtmlGenerator extends Writer implements Generator {
+ public static final class HtmlGenerator implements Generator {
- /** Internal character buffer */
- private final CharArrayWriter buffer = new CharArrayWriter(256);
+ private final StringWriter writer;
- /** Tag tokenizer */
- private final TagTokenizer tokenizer = new TagTokenizer();
+ private final HtmlParser htmlParser;
- /** Tag name buffer */
- private final CharArrayWriter tagNameBuffer = new CharArrayWriter(30);
-
- /** Tag name */
- private String tagName;
-
- /** Tag inclusion list */
- private Set<String> tagInclusionSet;
-
- /** Registered content handler */
private ContentHandler contentHandler;
- /** Parse state constant */
- private final static int PS_OUTSIDE = 0;
-
- /** Parse state constant */
- private final static int PS_TAG = PS_OUTSIDE + 1;
-
- /** Parse state constant */
- private final static int PS_SCRIPT = PS_TAG + 1;
-
- /** Parse state constant */
- private final static int PS_COMMENT = PS_SCRIPT + 1;
-
- /** Parse state constant */
- private final static int PS_STRING = PS_COMMENT + 1;
-
- /** Tag type constant */
- private final static int TT_NONE = 0;
-
- /** Tag type constant */
- private final static int TT_MAYBE = 1;
-
- /** Tag type constant */
- private final static int TT_TAG = 2;
-
- /** Parse state */
- private int parseState;
-
- /** Parse substate */
- private int parseSubState;
-
- /** Previous parse state */
- private int prevParseState;
-
- /** Current tag type */
- private int tagType;
-
- /** Quote character */
- private char quoteChar;
-
- /** Did we already start parsing? */
- boolean started = false;
-
- private final org.xml.sax.helpers.AttributesImpl atts = new org.xml.sax.helpers.AttributesImpl();
-
- /**
- * Default constructor.
- */
- public HtmlGenerator() {
- this.tagInclusionSet = DEFAULT_INCLUSION_TAGS;
+ public HtmlGenerator(final HtmlParser parser) {
+ this.htmlParser = parser;
+ this.writer = new StringWriter();
}
/**
- * @see org.apache.sling.rewriter.Generator#init(org.apache.sling.rewriter.ProcessingContext, org.apache.sling.rewriter.ProcessingComponentConfiguration)
+ * @see org.apache.sling.rewriter.Generator#finished()
*/
- public void init(ProcessingContext pipelineContext,
- ProcessingComponentConfiguration config) {
- final String[] includedTags = OsgiUtil.toStringArray(config
- .getConfiguration().get(INCLUDE_TAGS_PROPERTY));
- if (includedTags != null && includedTags.length > 0) {
- this.tagInclusionSet = new HashSet<String>();
- for (final String tag : includedTags) {
- this.tagInclusionSet.add(tag);
- }
- // we always have to include body!
- this.tagInclusionSet.add("/BODY");
- }
+ public void finished() throws IOException, SAXException {
+ this.htmlParser.parse(new ByteArrayInputStream(this.writer.toString().getBytes("UTF-8")), "UTF-8", this.contentHandler);
}
/**
* @see org.apache.sling.rewriter.Generator#getWriter()
*/
public PrintWriter getWriter() {
- return new PrintWriter(this);
- }
-
- public Set<String> getTagInclusionSet() {
- return tagInclusionSet;
+ return new PrintWriter(writer);
}
- public void setTagInclusionSet(Set<String> tagInclusionSet) {
- this.tagInclusionSet = tagInclusionSet;
+ public void init(ProcessingContext context,
+ ProcessingComponentConfiguration config)
+ throws IOException {
+ // nothing to do
}
/**
@@ -181,499 +89,6 @@
this.contentHandler = handler;
}
- @Override
- public void write(char cbuf[], int off, int len) throws IOException {
- this.update(cbuf, 0, len);
- }
-
- @Override
- public void write(int b) throws IOException {
- final char[] buf = new char[] { (char) b };
- this.update(buf, 0, buf.length);
- }
-
- @Override
- public void close() throws IOException {
- // nothing to do
- }
-
- @Override
- public void flush() throws IOException {
- flushBuffer();
-
- // send 0-length characters that eventually let the serializer flush the
- // underlying writer
- try {
- this.contentHandler.characters(new char[0], 0, 0);
- } catch (SAXException e) {
- throw handle(e);
- }
- }
-
- /**
- * Feed characters to the parser.
- *
- * @param buf
- * character buffer
- * @param off
- * offset where characters start
- * @param len
- * length of affected buffer
- */
- public void update(char[] buf, int off, int len) throws IOException {
- if (!this.started) {
- try {
- this.contentHandler.startDocument();
- } catch (SAXException se) {
- this.handle(se);
- }
- this.started = true;
- }
- int start = off;
- int end = off + len;
-
- for (int curr = start; curr < end; curr++) {
- char c = buf[curr];
-
- switch (parseState) {
- case PS_OUTSIDE:
- if (c == '<') {
- if (curr > start) {
- try {
- this.contentHandler.characters(buf, start, curr - start);
- } catch (SAXException e) {
- throw handle(e);
- }
- }
- start = curr;
- parseState = PS_TAG;
- parseSubState = 0;
- tagType = TT_MAYBE;
- resetTagName();
- }
- break;
- case PS_TAG:
- switch (parseSubState) {
- case -1:
- if (c == '"' || c == '\'') {
- quoteChar = c;
- prevParseState = parseState;
- parseState = PS_STRING;
- parseSubState = -1;
- } else if (c == '>') {
- parseState = PS_OUTSIDE;
- }
- break;
- case 0:
- if (c == '!') {
- parseState = PS_COMMENT;
- parseSubState = 0;
- tagType = TT_NONE;
- flushBuffer();
- } else if (c == '"' || c == '\'') {
- quoteChar = c;
- prevParseState = parseState;
- parseState = PS_STRING;
- parseSubState = -1;
- tagType = TT_NONE;
- flushBuffer();
- } else if (c == '>') {
- parseState = PS_OUTSIDE;
- tagType = TT_NONE;
- flushBuffer();
- } else if (!Character.isWhitespace(c)) {
- tagNameBuffer.write(c);
- parseSubState = 1;
- } else {
- parseSubState = -1;
- tagType = TT_NONE;
- flushBuffer();
- }
- break;
- case 1:
- if (c == '"' || c == '\'') {
- if (tagIncluded(getTagName())) {
- tagType = TT_TAG;
- } else {
- tagType = TT_NONE;
- flushBuffer();
- }
- parseSubState = 2;
- quoteChar = c;
- prevParseState = parseState;
- parseState = PS_STRING;
- } else if (c == '>') {
- if (tagIncluded(getTagName())) {
- processTag(buf, start, curr - start + 1);
- start = curr + 1;
- tagType = TT_NONE;
- parseState = getTagName()
- .equalsIgnoreCase("SCRIPT") ? PS_SCRIPT
- : PS_OUTSIDE;
- parseSubState = 0;
- } else {
- tagType = TT_NONE;
- flushBuffer();
- parseState = PS_OUTSIDE;
- }
- } else if (Character.isWhitespace(c)) {
- if (tagIncluded(getTagName())) {
- tagType = TT_TAG;
- } else {
- tagType = TT_NONE;
- flushBuffer();
- }
- parseSubState = 2;
- } else {
- tagNameBuffer.write(c);
- }
- break;
- case 2:
- if (c == '"' || c == '\'') {
- quoteChar = c;
- prevParseState = parseState;
- parseState = PS_STRING;
- } else if (c == '>') {
- if (tagType == TT_TAG) {
- processTag(buf, start, curr - start + 1);
- start = curr + 1;
- } else {
- flushBuffer();
- }
- tagType = TT_NONE;
- parseState = getTagName().equalsIgnoreCase("SCRIPT") ? PS_SCRIPT
- : PS_OUTSIDE;
- parseSubState = 0;
- }
- break;
- }
- break;
- case PS_COMMENT:
- switch (parseSubState) {
- case 0:
- if (c == '-') {
- parseSubState++;
- } else if (c == '"' || c == '\'') {
- quoteChar = c;
- prevParseState = PS_TAG;
- parseState = PS_STRING;
- parseSubState = -1;
- tagType = TT_NONE;
- flushBuffer();
- } else if (c == '>') {
- parseState = PS_OUTSIDE;
- tagType = TT_NONE;
- flushBuffer();
- } else {
- parseState = PS_TAG;
- parseSubState = -1;
- tagType = TT_NONE;
- flushBuffer();
- }
- break;
- case 1:
- if (c == '-') {
- parseSubState++;
- } else if (c == '"' || c == '\'') {
- quoteChar = c;
- prevParseState = PS_TAG;
- parseState = PS_STRING;
- parseSubState = -1;
- tagType = TT_NONE;
- flushBuffer();
- } else if (c == '>') {
- parseState = PS_OUTSIDE;
- tagType = TT_NONE;
- flushBuffer();
- } else {
- parseState = PS_TAG;
- parseSubState = -1;
- tagType = TT_NONE;
- flushBuffer();
- }
- break;
- case 2:
- if (c == '-') {
- parseSubState++;
- }
- break;
- case 3:
- if (c == '-') {
- parseSubState++;
- } else {
- parseSubState = 2;
- }
- break;
- case 4:
- if (c == '>') {
- parseState = PS_OUTSIDE;
- } else {
- parseSubState = 2;
- }
- break;
- }
- break;
-
- case PS_SCRIPT:
- switch (parseSubState) {
- case 0:
- if (c == '<') {
- if (curr > start) {
- try {
- this.contentHandler.characters(buf, start, curr - start);
- } catch (SAXException e) {
- throw handle(e);
- }
- }
- start = curr;
- tagType = TT_MAYBE;
- parseSubState++;
- }
- break;
- case 1:
- if (c == '/') {
- parseSubState++;
- } else {
- tagType = TT_NONE;
- flushBuffer();
- parseSubState = 0;
- }
- break;
- case 2:
- if (c == 'S' || c == 's') {
- parseSubState++;
- } else {
- tagType = TT_NONE;
- flushBuffer();
- parseSubState = 0;
- }
- break;
- case 3:
- if (c == 'C' || c == 'c') {
- parseSubState++;
- } else {
- tagType = TT_NONE;
- flushBuffer();
- parseSubState = 0;
- }
- break;
- case 4:
- if (c == 'R' || c == 'r') {
- parseSubState++;
- } else {
- tagType = TT_NONE;
- flushBuffer();
- parseSubState = 0;
- }
- break;
- case 5:
- if (c == 'I' || c == 'i') {
- parseSubState++;
- } else {
- tagType = TT_NONE;
- flushBuffer();
- parseSubState = 0;
- }
- break;
- case 6:
- if (c == 'P' || c == 'p') {
- parseSubState++;
- } else {
- tagType = TT_NONE;
- flushBuffer();
- parseSubState = 0;
- }
- break;
- case 7:
- if (c == 'T' || c == 't') {
- parseSubState++;
- } else {
- tagType = TT_NONE;
- flushBuffer();
- parseSubState = 0;
- }
- break;
- case 8:
- if (c == '>') {
- if (tagIncluded("SCRIPT")) {
- processTag(buf, start, curr - start + 1);
- start = curr + 1;
- } else {
- flushBuffer();
- }
- tagType = TT_NONE;
- parseState = PS_OUTSIDE;
- }
- break;
- }
- break;
-
- case PS_STRING:
- if (c == quoteChar) {
- parseState = prevParseState;
- }
- break;
- }
- }
- if (start < end) {
- if (tagType == TT_NONE) {
- try {
- this.contentHandler.characters(buf, start, end - start);
- } catch (SAXException e) {
- throw handle(e);
- }
- } else {
- buffer.write(buf, start, end - start);
- }
- }
- }
-
- /**
- * Return a flag indicating whether the parser has still some undigested
- * characters left.
- *
- * @return <code>true</code> if the parser still contains characters
- * <code>false</code> otherwise
- */
- public boolean isEmpty() {
- return buffer.size() == 0;
- }
-
- /**
- * Finish the parsing process. This forces the parser to flush the
- * characters still held in its internal buffer, regardless of the parsing
- * state.
- */
- public void finished() throws IOException {
- flushBuffer();
- if ( this.started ) {
- try {
- this.contentHandler.endDocument();
- } catch (SAXException e) {
- throw handle(e);
- }
-
- }
- }
-
- /**
- * Clears the internal tagname buffer and cache
- */
- protected void resetTagName() {
- tagName = null;
- tagNameBuffer.reset();
- }
-
- /**
- * Returns the tagname scanned and resets the internal tagname buffer
- *
- * @return tagname
- */
- protected String getTagName() {
- if (tagName == null) {
- tagName = tagNameBuffer.toString();
- }
- return tagName;
- }
-
- /**
- * Flush internal buffer. This forces the parser to flush the characters
- * still held in its internal buffer, regardless of the parsing state.
- */
- protected void flushBuffer() throws IOException {
- if (buffer.size() > 0) {
- char[] ch = buffer.toCharArray();
- try {
- this.contentHandler.characters(ch, 0, ch.length);
- } catch (SAXException e) {
- throw handle(e);
- }
- buffer.reset();
- }
- }
-
- /**
- * Returns a flag indicating whether the specified tag should be included in
- * the parsing process.
- *
- * @param tagName
- * tag name
- * @return <code>true</code> if the tag should be processed, else
- * <code>false</code>
- */
- protected boolean tagIncluded(String tagName) {
- return tagInclusionSet == null
- || tagInclusionSet.contains(tagName.toUpperCase());
- }
-
- /**
- * Decompose a tag and feed it to the document handler.
- *
- * @param ch
- * character data
- * @param off
- * offset where character data starts
- * @param len
- * length of character data
- */
- protected void processTag(char[] ch, int off, int len) throws IOException {
- buffer.write(ch, off, len);
-
- char[] snippet = buffer.toCharArray();
-
- tokenizer.tokenize(snippet, 0, snippet.length);
- if (!tokenizer.endTag()) {
- final AttributeList attributes = tokenizer.attributes();
- final String tagName = tokenizer.tagName();
- this.atts.clear();
-
- final char[] quotes = new char[attributes.attributeCount()];
- int index = 0;
- final Iterator<String> names = attributes.attributeNames();
- while (names.hasNext()) {
- final String name = names.next();
- final String value = attributes.getValue(name);
- if (value != null) {
- this.atts.addAttribute("", name, name, "CDATA", value);
- } else {
- this.atts.addAttribute("", name, name, "CDATA", "");
- }
- quotes[index] = attributes.getQuoteChar(name);
- index++;
- }
- if ( index > 0 ) {
- this.atts.addAttribute(NAMESPACE, QUOTES_ATTR, QUOTES_ATTR, "CDATA", new String(quotes));
- }
- try {
- if (tokenizer.endSlash()) {
- // just tell the contentHandler via attribute that an end slash is needed
- this.atts.addAttribute("", END_SLASH_ATTR, END_SLASH_ATTR, "CDATA", "");
- }
- this.contentHandler.startElement("", tagName, tagName, this.atts);
- } catch (SAXException e) {
- throw handle(e);
- }
- } else {
- try {
- final String tagName = tokenizer.tagName();
- this.contentHandler.endElement("", tagName, tagName);
- } catch (SAXException e) {
- throw handle(e);
- }
- }
-
- buffer.reset();
- }
-
- protected final IOException handle(SAXException se) {
- if ( se.getCause() != null && se.getCause() instanceof IOException) {
- return (IOException)se.getCause();
- }
- final IOException ioe = new IOException("Unable to parse document");
- ioe.initCause(se);
- return ioe;
- }
-
/**
* @see org.apache.sling.rewriter.Generator#dispose()
*/
@@ -681,4 +96,4 @@
// nothing to do
}
}
-}
+}
\ No newline at end of file
Modified: sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HtmlSerializerFactory.java
URL: http://svn.apache.org/viewvc/sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HtmlSerializerFactory.java?rev=805427&r1=805426&r2=805427&view=diff
==============================================================================
--- sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HtmlSerializerFactory.java (original)
+++ sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/HtmlSerializerFactory.java Tue Aug 18 14:30:34 2009
@@ -16,18 +16,8 @@
*/
package org.apache.sling.rewriter.impl;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.sling.rewriter.ProcessingComponentConfiguration;
-import org.apache.sling.rewriter.ProcessingContext;
import org.apache.sling.rewriter.Serializer;
import org.apache.sling.rewriter.SerializerFactory;
-import org.xml.sax.Attributes;
-import org.xml.sax.Locator;
-import org.xml.sax.SAXException;
/**
* This sax serializer serializes html-
@@ -37,185 +27,10 @@
*/
public class HtmlSerializerFactory implements SerializerFactory {
- private static final List<String> DEFAULT_EMPTY_TAGS;
- static {
- DEFAULT_EMPTY_TAGS = new ArrayList<String>();
- DEFAULT_EMPTY_TAGS.add("br");
- DEFAULT_EMPTY_TAGS.add("area");
- DEFAULT_EMPTY_TAGS.add("link");
- DEFAULT_EMPTY_TAGS.add("img");
- DEFAULT_EMPTY_TAGS.add("param");
- DEFAULT_EMPTY_TAGS.add("hr");
- DEFAULT_EMPTY_TAGS.add("input");
- DEFAULT_EMPTY_TAGS.add("col");
- DEFAULT_EMPTY_TAGS.add("base");
- DEFAULT_EMPTY_TAGS.add("meta");
- }
-
/**
* @see org.apache.sling.rewriter.SerializerFactory#createSerializer()
*/
public Serializer createSerializer() {
- return new HtmlSerializer();
- }
-
- public class HtmlSerializer implements Serializer {
-
- private PrintWriter delegatee;
-
- private List<String> emptyTags;
-
- /**
- * @see org.apache.sling.rewriter.Serializer#init(org.apache.sling.rewriter.ProcessingContext, org.apache.sling.rewriter.ProcessingComponentConfiguration)
- */
- public void init(ProcessingContext pipelineContext, ProcessingComponentConfiguration config)
- throws IOException {
- final PrintWriter writer = pipelineContext.getWriter();
- if (writer == null) {
- throw new IllegalArgumentException("Writer must not be null");
- }
- this.delegatee = writer;
- this.emptyTags = DEFAULT_EMPTY_TAGS;
- }
-
-
- /**
- * @see org.xml.sax.ContentHandler#endDocument()
- */
- public void endDocument() throws SAXException {
- this.delegatee.flush();
- }
-
- /**
- * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
- */
- public void startElement(String uri, String localName, String name,
- Attributes atts) throws SAXException {
- boolean endSlash = false;
- this.delegatee.write('<');
- this.delegatee.write(localName);
- final String quotesString = atts.getValue(HtmlGeneratorFactory.NAMESPACE, HtmlGeneratorFactory.QUOTES_ATTR);
- for(int i=0; i<atts.getLength(); i++) {
- if (HtmlGeneratorFactory.END_SLASH_ATTR.equals(atts.getQName(i))) {
- endSlash = true;
- } else if (!HtmlGeneratorFactory.NAMESPACE.equals(atts.getURI(i))) {
- this.delegatee.write(' ');
- this.delegatee.write(atts.getLocalName(i));
- final String value = atts.getValue(i);
- if ( value != null ) {
- this.delegatee.write('=');
- final char quoteChar;
- if ( quotesString != null && quotesString.length() > i ) {
- quoteChar = quotesString.charAt(i);
- } else {
- quoteChar = '\"';
- }
- this.delegatee.write(quoteChar);
- this.delegatee.write(value);
- this.delegatee.write(quoteChar);
- }
- }
- }
-
- if (endSlash) {
- // XHTML
- this.delegatee.write("/");
- }
-
- this.delegatee.write(">");
- }
-
- /**
- * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
- */
- public void endElement(String uri, String localName, String name)
- throws SAXException {
- if (!emptyTags.contains(localName)) {
- this.delegatee.write("</");
- this.delegatee.write(localName);
- this.delegatee.write('>');
- }
- }
-
-
- /**
- * Called by HtmlParser if character data and tags are to be output for which no
- * special handling is necessary.
- *
- * @param buffer Character data
- * @param offset Offset where character data starts
- * @param length The length of the character data
- */
- public void characters(char[] buffer, int offset, int length)
- throws SAXException {
- //this.checkStartElement(false);
-
- // special hack for flush request, see bug #20068
- if (length == 0) {
- this.delegatee.flush();
- } else {
- this.delegatee.write(buffer, offset, length);
- }
- }
-
- /**
- * @see org.xml.sax.ContentHandler#endPrefixMapping(java.lang.String)
- */
- public void endPrefixMapping(String prefix) throws SAXException {
- // not used atm
- }
-
- /**
- * @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int)
- */
- public void ignorableWhitespace(char[] ch, int start, int length)
- throws SAXException {
- // not used atm
- }
-
- /**
- * @see org.xml.sax.ContentHandler#processingInstruction(java.lang.String, java.lang.String)
- */
- public void processingInstruction(String target, String data)
- throws SAXException {
- // not used atm
- }
-
- /**
- * @see org.xml.sax.ContentHandler#setDocumentLocator(org.xml.sax.Locator)
- */
- public void setDocumentLocator(Locator locator) {
- // not used atm
- }
-
- /**
- * @see org.xml.sax.ContentHandler#skippedEntity(java.lang.String)
- */
- public void skippedEntity(String name) throws SAXException {
- // not used atm
- }
-
- /**
- * @see org.xml.sax.ContentHandler#startDocument()
- */
- public void startDocument() throws SAXException {
- // not used atm
- }
-
-
- /**
- * @see org.xml.sax.ContentHandler#startPrefixMapping(java.lang.String, java.lang.String)
- */
- public void startPrefixMapping(String prefix, String uri)
- throws SAXException {
- // not used atm
- }
-
- /**
- * @see org.apache.sling.rewriter.Serializer#dispose()
- */
- public void dispose() {
- // nothing to do
- }
+ return new HTMLSerializer();
}
}
Modified: sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/ProcessingComponentConfigurationImpl.java
URL: http://svn.apache.org/viewvc/sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/ProcessingComponentConfigurationImpl.java?rev=805427&r1=805426&r2=805427&view=diff
==============================================================================
--- sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/ProcessingComponentConfigurationImpl.java (original)
+++ sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/ProcessingComponentConfigurationImpl.java Tue Aug 18 14:30:34 2009
@@ -19,6 +19,8 @@
import java.util.HashMap;
import java.util.Map;
+import org.apache.sling.api.resource.ValueMap;
+import org.apache.sling.api.wrappers.ValueMapDecorator;
import org.apache.sling.rewriter.ProcessingComponentConfiguration;
/**
@@ -27,7 +29,7 @@
public class ProcessingComponentConfigurationImpl implements ProcessingComponentConfiguration {
/** Empty configuration map. */
- public static final Map<String, Object> EMPTY_CONFIG = new HashMap<String, Object>();
+ public static final ValueMap EMPTY_CONFIG = new ValueMapDecorator(new HashMap<String, Object>());
/** Empty configuration. */
public static final ProcessingComponentConfiguration EMPTY = new ProcessingComponentConfigurationImpl("<empty>", null);
@@ -36,7 +38,7 @@
private final String type;
/** The configuration map. */
- private final Map<String, Object> configuration;
+ private final ValueMap configuration;
/**
* Create a new configuration.
@@ -45,13 +47,13 @@
*/
public ProcessingComponentConfigurationImpl(final String type, final Map<String, Object> config) {
this.type = type;
- this.configuration = (config == null ? EMPTY_CONFIG : config);
+ this.configuration = (config == null ? EMPTY_CONFIG : new ValueMapDecorator(config));
}
/**
* @see org.apache.sling.rewriter.ProcessingComponentConfiguration#getConfiguration()
*/
- public Map<String, Object> getConfiguration() {
+ public ValueMap getConfiguration() {
return this.configuration;
}
Modified: sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/ProcessorManagerImpl.java
URL: http://svn.apache.org/viewvc/sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/ProcessorManagerImpl.java?rev=805427&r1=805426&r2=805427&view=diff
==============================================================================
--- sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/ProcessorManagerImpl.java (original)
+++ sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/ProcessorManagerImpl.java Tue Aug 18 14:30:34 2009
@@ -150,7 +150,7 @@
this.addProcessor("*", "", new ProcessorConfigurationImpl(
new String[] {MIME_TYPE_HTML}, // content types
null, // paths,
- null, // extension
+ new String[] {"html"}, // extension
-1, // order
new ProcessingComponentConfigurationImpl("html-generator", null), // generator config
null, // transformer config
Added: sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/XHTMLSerializer.java
URL: http://svn.apache.org/viewvc/sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/XHTMLSerializer.java?rev=805427&view=auto
==============================================================================
--- sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/XHTMLSerializer.java (added)
+++ sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/XHTMLSerializer.java Tue Aug 18 14:30:34 2009
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sling.rewriter.impl;
+
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+
+import org.apache.sling.rewriter.ProcessingComponentConfiguration;
+import org.apache.sling.rewriter.ProcessingContext;
+import org.apache.sling.rewriter.Serializer;
+
+/**
+ * <p>A pedantinc XHTML serializer encoding all recognized entities with their
+ * proper HTML names.</p>
+ *
+ * <p>For configuration options of this serializer, please look at the
+ * {@link org.apache.cocoon.components.serializers.util.EncodingSerializer},
+ * in addition to those, this serializer also support the specification of a
+ * default doctype. This default will be used if no document type is received
+ * in the SAX events.
+ *
+ * <p>The value <i>mytype</i> can be one of:</p>
+ *
+ * <dl>
+ * <dt>"<code>none</code>"</dt>
+ * <dd>Not to emit any dococument type declaration.</dd>
+ * <dt>"<code>strict</code>"</dt>
+ * <dd>The XHTML 1.0 Strict document type.</dd>
+ * <dt>"<code>loose</code>"</dt>
+ * <dd>The XHTML 1.0 Transitional document type.</dd>
+ * <dt>"<code>frameset</code>"</dt>
+ * <dd>The XHTML 1.0 Frameset document type.</dd>
+ * </dl>
+ *
+ */
+public class XHTMLSerializer
+ extends org.apache.cocoon.components.serializers.util.XHTMLSerializer
+ implements Serializer {
+
+ /**
+ * @see org.apache.sling.rewriter.Serializer#init(org.apache.sling.rewriter.ProcessingContext, org.apache.sling.rewriter.ProcessingComponentConfiguration)
+ */
+ public void init(ProcessingContext context,
+ ProcessingComponentConfiguration config)
+ throws IOException {
+ final String encoding = config.getConfiguration().get("encoding", "UTF-8");
+ try {
+ this.setEncoding(encoding);
+ } catch (UnsupportedEncodingException exception) {
+ throw new IOException("Encoding not supported: " + encoding);
+ }
+ setIndentPerLevel(config.getConfiguration().get("indent", 0));
+ setOmitXmlDeclaration(config.getConfiguration().get("omit-xml-declaration", "no"));
+ setDoctypeDefault(config.getConfiguration().get("doctype-default", String.class));
+
+ this.setup(context.getRequest());
+ this.setOutputStream(context.getOutputStream());
+ }
+
+ /**
+ * @see org.apache.sling.rewriter.Serializer#dispose()
+ */
+ public void dispose() {
+ // nothing to do
+ }
+}
Propchange: sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/XHTMLSerializer.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/XHTMLSerializer.java
------------------------------------------------------------------------------
svn:keywords = author date id revision rev url
Propchange: sling/trunk/contrib/extensions/rewriter/src/main/java/org/apache/sling/rewriter/impl/XHTMLSerializer.java
------------------------------------------------------------------------------
svn:mime-type = text/plain