You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ch...@apache.org on 2017/03/04 01:00:06 UTC

[41/50] [abbrv] [text] TEXT-40 - Escape HTML characters only once

TEXT-40 - Escape HTML characters only once

revert as per the issue comments

Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/e9273cd4
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/e9273cd4
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/e9273cd4

Branch: refs/heads/release
Commit: e9273cd4bb3da622ed761c998a6fb6e731538e18
Parents: 40061c7
Author: Sebb <se...@apache.org>
Authored: Wed Feb 22 16:14:46 2017 +0000
Committer: Sebb <se...@apache.org>
Committed: Wed Feb 22 16:14:46 2017 +0000

----------------------------------------------------------------------
 src/changes/changes.xml                         |   9 +-
 .../apache/commons/text/StringEscapeUtils.java  |  93 +----------
 .../text/translate/SingleLookupTranslator.java  | 153 -------------------
 .../commons/text/StringEscapeUtilsTest.java     |  50 ------
 4 files changed, 9 insertions(+), 296 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/e9273cd4/src/changes/changes.xml
----------------------------------------------------------------------
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 94db412..155d6f8 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -45,7 +45,14 @@ The <action> type attribute can be add,update,fix,remove.
   </properties>
   <body>
 
-  <release version="TBA" date="TBA" description="TBA">
+  <release version="TBA" date="TBA" description="
+  
+   Incompatible changes
+   ====================
+   Methods StringEscapeUtils#escapeHtml3Once and StringEscapeUtils#escapeHtml4Once
+   have been removed; see TEXT-40
+  ">
+    <action issue="TEXT-40" type="remove" dev="sebb">Escape HTML characters only once: revert</action>
     <action issue="TEXT-65" type="fix" dev="chtompki">Fixing the 200 checkstyle errors present in 1.0-beta-1</action>
     <action issue="TEXT-63" type="fix" dev="sebb">Mutable fields should be private</action>
   </release>

http://git-wip-us.apache.org/repos/asf/commons-text/blob/e9273cd4/src/main/java/org/apache/commons/text/StringEscapeUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/StringEscapeUtils.java b/src/main/java/org/apache/commons/text/StringEscapeUtils.java
index f98f116..05d2348 100644
--- a/src/main/java/org/apache/commons/text/StringEscapeUtils.java
+++ b/src/main/java/org/apache/commons/text/StringEscapeUtils.java
@@ -26,7 +26,6 @@ import org.apache.commons.text.translate.LookupTranslator;
 import org.apache.commons.text.translate.NumericEntityEscaper;
 import org.apache.commons.text.translate.NumericEntityUnescaper;
 import org.apache.commons.text.translate.OctalUnescaper;
-import org.apache.commons.text.translate.SingleLookupTranslator;
 import org.apache.commons.text.translate.UnicodeUnescaper;
 import org.apache.commons.text.translate.UnicodeUnpairedSurrogateRemover;
 
@@ -205,25 +204,6 @@ public class StringEscapeUtils {
             );
 
     /**
-     * The improved translator object for escaping HTML version 3.0.
-     * The 'improved' part of this translator is that it checks if the html is already translated.
-     * This check prevents double, triple, or recursive translations.
-     *
-     * While {@link #escapeHtml3Once(String)} is the expected method of use, this
-     * object allows the HTML escaping functionality to be used
-     * as the foundation for a custom translator.
-     *
-     * Note that, multiple lookup tables should be passed to this translator
-     * instead of passing multiple instances of this translator to the
-     * AggregateTranslator. Because, a SingleLookupTranslator only checks the values of the
-     * lookup table passed to that instance while deciding whether a value is
-     * already translated or not.
-     */
-    public static final CharSequenceTranslator ESCAPE_HTML3_ONCE =
-            new SingleLookupTranslator(EntityArrays.BASIC_ESCAPE, EntityArrays.ISO8859_1_ESCAPE);
-
-
-    /**
      * Translator object for escaping HTML version 4.0.
      *
      * While {@link #escapeHtml4(String)} is the expected method of use, this
@@ -238,28 +218,6 @@ public class StringEscapeUtils {
             );
 
     /**
-     * The improved translator object for escaping HTML version 4.0.
-     * The 'improved' part of this translator is that it checks if the html is already translated.
-     * This check prevents double, triple, or recursive translations.
-     *
-     * While {@link #escapeHtml4Once(String)} is the expected method of use, this
-     * object allows the HTML escaping functionality to be used
-     * as the foundation for a custom translator.
-     *
-     * Note that, multiple lookup tables should be passed to this translator
-     * instead of passing multiple instances of this translator to the
-     * AggregateTranslator. Because, a SingleLookupTranslator only checks the values of the
-     * lookup table passed to that instance while deciding whether a value is
-     * already translated or not.
-     */
-    public static final CharSequenceTranslator ESCAPE_HTML4_ONCE =
-            new SingleLookupTranslator(
-                    EntityArrays.BASIC_ESCAPE,
-                    EntityArrays.ISO8859_1_ESCAPE,
-                    EntityArrays.HTML40_EXTENDED_ESCAPE
-            );
-
-    /**
      * Translator object for escaping individual Comma Separated Values.
      *
      * While {@link #escapeCsv(String)} is the expected method of use, this
@@ -702,43 +660,6 @@ public class StringEscapeUtils {
     }
 
     /**
-     * <p>Escapes the characters in a {@code String} using HTML entities.
-     * But escapes them only once. i.e. does not escape already escaped characters.</p>
-     *
-     * <p>
-     * For example:
-     * </p>
-     * <p><code>"bread" &amp; "butter"</code></p>
-     * becomes:
-     * <p>
-     * <code>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</code>.
-     * </p>
-     *
-     * <p>
-     * But:
-     * </p>
-     * <p><code>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</code></p>
-     * remains unaffected.
-     *
-     * <p>Supports all known HTML 4.0 entities, including funky accents.
-     * Note that the commonly used apostrophe escape character (&amp;apos;)
-     * is not a legal entity and so is not supported). </p>
-     *
-     * @param input  the {@code String} to escape, may be null
-     * @return a new escaped {@code String}, {@code null} if null string input
-     *
-     * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
-     * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
-     * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
-     * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
-     * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
-     */
-    public static final String escapeHtml4Once(final String input) {
-        return ESCAPE_HTML4_ONCE.translate(input);
-    }
-
-
-    /**
      * <p>Escapes the characters in a {@code String} using HTML entities.</p>
      * <p>Supports only the HTML 3.0 entities. </p>
      *
@@ -749,18 +670,6 @@ public class StringEscapeUtils {
         return ESCAPE_HTML3.translate(input);
     }
 
-    /**
-     * <p>Escapes the characters in a {@code String} using HTML entities.
-     * But escapes them only once. i.e. does not escape already escaped characters.</p>
-     * <p>Supports only the HTML 3.0 entities. </p>
-     *
-     * @param input  the {@code String} to escape, may be null
-     * @return a new escaped {@code String}, {@code null} if null string input
-     */
-    public static final String escapeHtml3Once(final String input) {
-        return ESCAPE_HTML3_ONCE.translate(input);
-    }
-
     //-----------------------------------------------------------------------
     /**
      * <p>Unescapes a string containing entity escapes to a string
@@ -768,7 +677,7 @@ public class StringEscapeUtils {
      * escapes. Supports HTML 4.0 entities.</p>
      *
      * <p>For example, the string {@code "&lt;Fran&ccedil;ais&gt;"}
-     * will become {@code "<Fran\ufffdais>"}</p>
+     * will become {@code "<Fran\ufffdais>"}</p>
      *
      * <p>If an entity is unrecognized, it is left alone, and inserted
      * verbatim into the result string. e.g. {@code "&gt;&zzzz;x"} will

http://git-wip-us.apache.org/repos/asf/commons-text/blob/e9273cd4/src/main/java/org/apache/commons/text/translate/SingleLookupTranslator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/translate/SingleLookupTranslator.java b/src/main/java/org/apache/commons/text/translate/SingleLookupTranslator.java
deleted file mode 100644
index 8fafab8..0000000
--- a/src/main/java/org/apache/commons/text/translate/SingleLookupTranslator.java
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.text.translate;
-
-import java.io.IOException;
-import java.io.Writer;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Map;
-
-/**
- * Translates a value using a lookup table.
- * But doesn't translate if that value is already translated.
- *
- * @since 1.0
- */
-public class SingleLookupTranslator extends CharSequenceTranslator {
-
-    /** The lookupMap to be used for translation. */
-    private final Map<String, String> lookupMap;
-    /** The first character of each key in the lookupMap. */
-    private final HashSet<Character> prefixSet;
-    /** The length of the shortest key in the lookupMap. */
-    private final int shortest;
-    /** The length of the longest key in the lookupMap. */
-    private final int longest;
-    /** The length of the shortest value in the lookupMap. */
-    private final int shortestValue;
-    /** The length of the longest value in the lookupMap. */
-    private final int longestValue;
-
-    /**
-     * Define the look tables to be used in translation.
-     * <p>
-     * Note that, as of Lang 3.1, the key to the lookup table is converted to a
-     * java.lang.String. This is because we need the key to support hashCode and
-     * equals(Object), allowing it to be the key for a HashMap. See LANG-882.
-     * <p>
-     * Also note that, multiple lookup tables should be passed to this translator
-     * instead of passing multiple instances of this translator to the
-     * AggregateTranslator. Because, this translator only checks the values of the
-     * lookup table passed to this instance while deciding whether a value is
-     * already translated or not.
-     *
-     * @param inputMaps an array of Map&lt;CharSequence, CharSequence&gt;.
-     */
-    public SingleLookupTranslator(Map<CharSequence, CharSequence>... inputMaps) {
-        Map<CharSequence, CharSequence> lookup = new HashMap<>();
-        for (Map<CharSequence, CharSequence> input : inputMaps) {
-            Iterator<Map.Entry<CharSequence, CharSequence>> it = input.entrySet().iterator();
-            while (it.hasNext()) {
-                Map.Entry<CharSequence, CharSequence> pair = it.next();
-                lookup.put(pair.getKey(), pair.getValue());
-            }
-        }
-        lookupMap = new HashMap<String, String>();
-        prefixSet = new HashSet<Character>();
-        int _shortest = Integer.MAX_VALUE;
-        int _longest = 0;
-        int _shortestValue = Integer.MAX_VALUE;
-        int _longestValue = 0;
-        if (lookup != null) {
-            Iterator<Map.Entry<CharSequence, CharSequence>> it = lookup.entrySet().iterator();
-            while (it.hasNext()) {
-                Map.Entry<CharSequence, CharSequence> pair = it.next();
-                this.lookupMap.put(pair.getKey().toString(), pair.getValue().toString());
-                this.prefixSet.add(pair.getKey().charAt(0));
-                final int sz = pair.getKey().length();
-                if (sz < _shortest) {
-                    _shortest = sz;
-                }
-                if (sz > _longest) {
-                    _longest = sz;
-                }
-                final int sizeOfValue = lookup.get(pair.getKey()).length();
-                if (sizeOfValue < _shortestValue) {
-                    _shortestValue = sizeOfValue;
-                }
-                if (sizeOfValue > _longestValue) {
-                    _longestValue = sizeOfValue;
-                }
-            }
-        }
-        shortest = _shortest;
-        longest = _longest;
-        shortestValue = _shortestValue;
-        longestValue = _longestValue;
-    }
-
-    /**
-     * Translate a set of codepoints, represented by an int index into a CharSequence,
-     * into another set of codepoints. The number of codepoints consumed must be returned,
-     * and the only IOExceptions thrown must be from interacting with the Writer so that
-     * the top level API may reliably ignore StringWriter IOExceptions.
-     *
-     * @param input CharSequence that is being translated
-     * @param index int representing the current point of translation
-     * @param out   Writer to translate the text to
-     * @return int count of codepoints consumed
-     * @throws IOException if and only if the Writer produces an IOException
-     */
-    @Override
-    public int translate(CharSequence input, int index, Writer out) throws IOException {
-        // check if already translated
-        int maxValue = longestValue;
-        if (index + maxValue > input.length()) {
-            maxValue = input.length() - index;
-        }
-        // implement greedy algorithm to check all the possible 'value' matches
-        // for which we need to skip translation.
-        for (int i = maxValue; i >= shortestValue; i--) {
-            final CharSequence subSeq = input.subSequence(index, index + i);
-            // If the sub-string is already translated, return without translating.
-            if (lookupMap.containsValue(subSeq.toString())) {
-                return 0;
-            }
-        }
-
-        // check if translation exists for the input at position index
-        if (prefixSet.contains(input.charAt(index))) {
-            int max = longest;
-            if (index + longest > input.length()) {
-                max = input.length() - index;
-            }
-            // implement greedy algorithm by trying maximum match first
-            for (int i = max; i >= shortest; i--) {
-                final CharSequence subSeq = input.subSequence(index, index + i);
-                final String result = lookupMap.get(subSeq.toString());
-
-                if (result != null) {
-                    out.write(result);
-                    return i;
-                }
-            }
-        }
-        return 0;
-    }
-}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/e9273cd4/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java b/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java
index f716763..ef9d8ab 100644
--- a/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java
+++ b/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java
@@ -241,56 +241,6 @@ public class StringEscapeUtilsTest {
     }
 
     @Test
-    public void testEscapeHtml4Once() {
-        for (final String[] element : HTML_ESCAPES) {
-            final String message = element[0];
-            final String expected = element[1];
-            final String original = element[2];
-            assertEquals(message, expected, StringEscapeUtils.escapeHtml4Once(original));
-            assertEquals(message, expected, StringEscapeUtils.escapeHtml4Once(expected));
-            final StringWriter sw = new StringWriter();
-            try {
-                StringEscapeUtils.ESCAPE_HTML4_ONCE.translate(original, sw);
-            } catch (final IOException e) {
-            }
-            final String actual = original == null ? null : sw.toString();
-            assertEquals(message, expected, actual);
-            final StringWriter sw2 = new StringWriter();
-            try {
-                StringEscapeUtils.ESCAPE_HTML4_ONCE.translate(expected, sw2);
-            } catch (final IOException e) {
-            }
-            final String actual2 = original == null ? null : sw2.toString();
-            assertEquals(message, expected, actual2);
-        }
-    }
-
-    @Test
-    public void testEscapeHtml3Once() {
-        for (final String[] element : HTML_ESCAPES) {
-            final String message = element[0];
-            final String expected = element[1];
-            final String original = element[2];
-            assertEquals(message, expected, StringEscapeUtils.escapeHtml3Once(original));
-            assertEquals(message, expected, StringEscapeUtils.escapeHtml3Once(expected));
-            final StringWriter sw = new StringWriter();
-            try {
-                StringEscapeUtils.ESCAPE_HTML3_ONCE.translate(original, sw);
-            } catch (final IOException e) {
-            }
-            final String actual = original == null ? null : sw.toString();
-            assertEquals(message, expected, actual);
-            final StringWriter sw2 = new StringWriter();
-            try {
-                StringEscapeUtils.ESCAPE_HTML3_ONCE.translate(expected, sw2);
-            } catch (final IOException e) {
-            }
-            final String actual2 = original == null ? null : sw2.toString();
-            assertEquals(message, expected, actual2);
-        }
-    }
-
-    @Test
     public void testUnescapeHtml4() {
         for (final String[] element : HTML_ESCAPES) {
             final String message = element[0];