You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sling.apache.org by dk...@apache.org on 2021/01/12 22:07:24 UTC

[sling-org-apache-sling-rewriter] branch master updated: Replacing Cocoon with Commons Text for HTML entity encoding

This is an automated email from the ASF dual-hosted git repository.

dklco pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sling-org-apache-sling-rewriter.git


The following commit(s) were added to refs/heads/master by this push:
     new 5d16e89  Replacing Cocoon with Commons Text for HTML entity encoding
5d16e89 is described below

commit 5d16e8986434c24a82917306f360a9bdceb36430
Author: Dan Klco <dk...@apache.org>
AuthorDate: Tue Jan 12 17:07:08 2021 -0500

    Replacing Cocoon with Commons Text for HTML entity encoding
---
 pom.xml                                            |  8 ++--
 .../rewriter/impl/components/Html5Serializer.java  | 56 +++-------------------
 .../impl/components/Html5SerializerTest.java       | 14 ------
 .../rewriter/it/tests/RewriterTestSupport.java     |  2 +-
 4 files changed, 12 insertions(+), 68 deletions(-)

diff --git a/pom.xml b/pom.xml
index 4733a55..a86fa1e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -99,11 +99,11 @@
 
     <dependencies>
         <dependency>
-            <groupId>commons-lang</groupId>
-            <artifactId>commons-lang</artifactId>
-            <version>2.4</version>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-text</artifactId>
+            <version>1.9</version>
             <scope>provided</scope>
-        </dependency>
+         </dependency>
         <dependency>
             <groupId>org.apache.sling</groupId>
             <artifactId>org.apache.sling.api</artifactId>
diff --git a/src/main/java/org/apache/sling/rewriter/impl/components/Html5Serializer.java b/src/main/java/org/apache/sling/rewriter/impl/components/Html5Serializer.java
index ad14116..a247424 100644
--- a/src/main/java/org/apache/sling/rewriter/impl/components/Html5Serializer.java
+++ b/src/main/java/org/apache/sling/rewriter/impl/components/Html5Serializer.java
@@ -18,15 +18,11 @@ package org.apache.sling.rewriter.impl.components;
 
 import java.io.IOException;
 import java.io.PrintWriter;
-import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Set;
 
-import org.apache.cocoon.components.serializers.encoding.Charset;
-import org.apache.cocoon.components.serializers.encoding.CharsetFactory;
-import org.apache.cocoon.components.serializers.encoding.Encoder;
-import org.apache.cocoon.components.serializers.encoding.HTMLEncoder;
+import org.apache.commons.text.StringEscapeUtils;
 import org.apache.sling.rewriter.ProcessingComponentConfiguration;
 import org.apache.sling.rewriter.ProcessingContext;
 import org.apache.sling.rewriter.Serializer;
@@ -58,16 +54,16 @@ public class Html5Serializer implements Serializer {
     }
     private PrintWriter writer;
 
-    private Charset charset;
-
-    private Encoder encoder;
-
     @Override
     public void characters(char[] buffer, int offset, int length) throws SAXException {
         if (length == 0) {
             writer.flush();
         } else {
-            writeEncoded(buffer, offset, length);
+            if (offset < 0 || offset + length > buffer.length) {
+                throw new SAXException("Offset / length out of bounds");
+            }
+            writer.write(
+                    StringEscapeUtils.escapeHtml4(new String(Arrays.copyOfRange(buffer, offset, offset + length))));
         }
     }
 
@@ -107,9 +103,6 @@ public class Html5Serializer implements Serializer {
         } else {
             writer = context.getWriter();
         }
-        this.charset = CharsetFactory.newInstance()
-                .getCharset(config.getConfiguration().get("encoding", StandardCharsets.UTF_8.name()));
-        this.encoder = new HTMLEncoder();
     }
 
     @Override
@@ -151,8 +144,7 @@ public class Html5Serializer implements Serializer {
 
             writer.write(CHAR_EQ);
             writer.write(CHAR_QT);
-            char[] data = value.toCharArray();
-            this.writeEncoded(data, 0, data.length);
+            writer.write(StringEscapeUtils.escapeHtml4(value));
             writer.write(CHAR_QT);
         }
 
@@ -184,38 +176,4 @@ public class Html5Serializer implements Serializer {
         // Nothing required
     }
 
-    /**
-     * Encode and write a specific part of an array of characters.
-     */
-    private void writeEncoded(char[] data, int start, int length) throws SAXException {
-        int end = start + length;
-
-        if (data == null) {
-            throw new SAXException("Invalid data, null");
-        }
-        if ((start < 0) || (start > data.length) || (length < 0) || (end > data.length) || (end < 0)) {
-            throw new SAXException("Invalid data, out of bounds");
-        }
-        if (length == 0) {
-            return;
-        }
-
-        for (int i = start; i < end; i++) {
-            char c = data[i];
-
-            if (this.charset.allows(c) && this.encoder.allows(c)) {
-                continue;
-            }
-
-            if (start != i) {
-                writer.write(data, start, i - start);
-            }
-            writer.write(this.encoder.encode(c));
-            start = i + 1;
-        }
-        if (start != end) {
-            writer.write(data, start, end - start);
-        }
-    }
-
 }
diff --git a/src/test/java/org/apache/sling/rewriter/impl/components/Html5SerializerTest.java b/src/test/java/org/apache/sling/rewriter/impl/components/Html5SerializerTest.java
index 6601e5f..56671e9 100644
--- a/src/test/java/org/apache/sling/rewriter/impl/components/Html5SerializerTest.java
+++ b/src/test/java/org/apache/sling/rewriter/impl/components/Html5SerializerTest.java
@@ -22,11 +22,9 @@ import static org.junit.Assert.fail;
 import java.io.IOException;
 import java.io.PrintWriter;
 import java.io.StringWriter;
-import java.io.UnsupportedEncodingException;
 import java.util.Collections;
 
 import org.apache.sling.api.wrappers.ValueMapDecorator;
-import org.apache.sling.rewriter.ProcessingComponentConfiguration;
 import org.apache.sling.rewriter.ProcessingContext;
 import org.apache.sling.rewriter.impl.ProcessingComponentConfigurationImpl;
 import org.junit.Before;
@@ -55,18 +53,6 @@ public class Html5SerializerTest {
         testSerializer.init(context, config);
     }
 
-    @Test
-    public void testInvalidCharset() throws IOException {
-        Html5Serializer serializer = new Html5Serializer();
-        ProcessingComponentConfiguration invalidConfig = new ProcessingComponentConfigurationImpl("/apps/config",
-                new ValueMapDecorator(Collections.singletonMap("encoding", "NOT-VALID-99")));
-        try {
-            serializer.init(context, invalidConfig);
-            fail();
-        } catch (UnsupportedEncodingException | IllegalArgumentException e) {
-            // caught expected exception
-        }
-    }
 
     @Test
     public void testNoWriter() throws IOException {
diff --git a/src/test/java/org/apache/sling/rewriter/it/tests/RewriterTestSupport.java b/src/test/java/org/apache/sling/rewriter/it/tests/RewriterTestSupport.java
index d138c24..c6c32c9 100644
--- a/src/test/java/org/apache/sling/rewriter/it/tests/RewriterTestSupport.java
+++ b/src/test/java/org/apache/sling/rewriter/it/tests/RewriterTestSupport.java
@@ -68,7 +68,7 @@ public abstract class RewriterTestSupport extends TestSupport {
                 .asOption(),
             // testing
             mavenBundle().groupId("org.jsoup").artifactId("jsoup").versionAsInProject(),
-            mavenBundle().groupId("org.apache.cocoon").artifactId("cocoon-serializers-charsets").versionAsInProject(),
+            mavenBundle().groupId("org.apache.commons").artifactId("commons-text").versionAsInProject(),
             mavenBundle().groupId("org.apache.servicemix.bundles").artifactId("org.apache.servicemix.bundles.hamcrest").versionAsInProject(),
             slingResourcePresence(),
             junitBundles()