You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ch...@apache.org on 2017/03/04 01:00:06 UTC
[41/50] [abbrv] [text] TEXT-40 - Escape HTML characters only once
TEXT-40 - Escape HTML characters only once
revert as per the issue comments
Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/e9273cd4
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/e9273cd4
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/e9273cd4
Branch: refs/heads/release
Commit: e9273cd4bb3da622ed761c998a6fb6e731538e18
Parents: 40061c7
Author: Sebb <se...@apache.org>
Authored: Wed Feb 22 16:14:46 2017 +0000
Committer: Sebb <se...@apache.org>
Committed: Wed Feb 22 16:14:46 2017 +0000
----------------------------------------------------------------------
src/changes/changes.xml | 9 +-
.../apache/commons/text/StringEscapeUtils.java | 93 +----------
.../text/translate/SingleLookupTranslator.java | 153 -------------------
.../commons/text/StringEscapeUtilsTest.java | 50 ------
4 files changed, 9 insertions(+), 296 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-text/blob/e9273cd4/src/changes/changes.xml
----------------------------------------------------------------------
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 94db412..155d6f8 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -45,7 +45,14 @@ The <action> type attribute can be add,update,fix,remove.
</properties>
<body>
- <release version="TBA" date="TBA" description="TBA">
+ <release version="TBA" date="TBA" description="
+
+ Incompatible changes
+ ====================
+ Methods StringEscapeUtils#escapeHtml3Once and StringEscapeUtils#escapeHtml4Once
+ have been removed; see TEXT-40
+ ">
+ <action issue="TEXT-40" type="remove" dev="sebb">Escape HTML characters only once: revert</action>
<action issue="TEXT-65" type="fix" dev="chtompki">Fixing the 200 checkstyle errors present in 1.0-beta-1</action>
<action issue="TEXT-63" type="fix" dev="sebb">Mutable fields should be private</action>
</release>
http://git-wip-us.apache.org/repos/asf/commons-text/blob/e9273cd4/src/main/java/org/apache/commons/text/StringEscapeUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/StringEscapeUtils.java b/src/main/java/org/apache/commons/text/StringEscapeUtils.java
index f98f116..05d2348 100644
--- a/src/main/java/org/apache/commons/text/StringEscapeUtils.java
+++ b/src/main/java/org/apache/commons/text/StringEscapeUtils.java
@@ -26,7 +26,6 @@ import org.apache.commons.text.translate.LookupTranslator;
import org.apache.commons.text.translate.NumericEntityEscaper;
import org.apache.commons.text.translate.NumericEntityUnescaper;
import org.apache.commons.text.translate.OctalUnescaper;
-import org.apache.commons.text.translate.SingleLookupTranslator;
import org.apache.commons.text.translate.UnicodeUnescaper;
import org.apache.commons.text.translate.UnicodeUnpairedSurrogateRemover;
@@ -205,25 +204,6 @@ public class StringEscapeUtils {
);
/**
- * The improved translator object for escaping HTML version 3.0.
- * The 'improved' part of this translator is that it checks if the html is already translated.
- * This check prevents double, triple, or recursive translations.
- *
- * While {@link #escapeHtml3Once(String)} is the expected method of use, this
- * object allows the HTML escaping functionality to be used
- * as the foundation for a custom translator.
- *
- * Note that, multiple lookup tables should be passed to this translator
- * instead of passing multiple instances of this translator to the
- * AggregateTranslator. Because, a SingleLookupTranslator only checks the values of the
- * lookup table passed to that instance while deciding whether a value is
- * already translated or not.
- */
- public static final CharSequenceTranslator ESCAPE_HTML3_ONCE =
- new SingleLookupTranslator(EntityArrays.BASIC_ESCAPE, EntityArrays.ISO8859_1_ESCAPE);
-
-
- /**
* Translator object for escaping HTML version 4.0.
*
* While {@link #escapeHtml4(String)} is the expected method of use, this
@@ -238,28 +218,6 @@ public class StringEscapeUtils {
);
/**
- * The improved translator object for escaping HTML version 4.0.
- * The 'improved' part of this translator is that it checks if the html is already translated.
- * This check prevents double, triple, or recursive translations.
- *
- * While {@link #escapeHtml4Once(String)} is the expected method of use, this
- * object allows the HTML escaping functionality to be used
- * as the foundation for a custom translator.
- *
- * Note that, multiple lookup tables should be passed to this translator
- * instead of passing multiple instances of this translator to the
- * AggregateTranslator. Because, a SingleLookupTranslator only checks the values of the
- * lookup table passed to that instance while deciding whether a value is
- * already translated or not.
- */
- public static final CharSequenceTranslator ESCAPE_HTML4_ONCE =
- new SingleLookupTranslator(
- EntityArrays.BASIC_ESCAPE,
- EntityArrays.ISO8859_1_ESCAPE,
- EntityArrays.HTML40_EXTENDED_ESCAPE
- );
-
- /**
* Translator object for escaping individual Comma Separated Values.
*
* While {@link #escapeCsv(String)} is the expected method of use, this
@@ -702,43 +660,6 @@ public class StringEscapeUtils {
}
/**
- * <p>Escapes the characters in a {@code String} using HTML entities.
- * But escapes them only once. i.e. does not escape already escaped characters.</p>
- *
- * <p>
- * For example:
- * </p>
- * <p><code>"bread" & "butter"</code></p>
- * becomes:
- * <p>
- * <code>&quot;bread&quot; &amp; &quot;butter&quot;</code>.
- * </p>
- *
- * <p>
- * But:
- * </p>
- * <p><code>&quot;bread&quot; &amp; &quot;butter&quot;</code></p>
- * remains unaffected.
- *
- * <p>Supports all known HTML 4.0 entities, including funky accents.
- * Note that the commonly used apostrophe escape character (&apos;)
- * is not a legal entity and so is not supported). </p>
- *
- * @param input the {@code String} to escape, may be null
- * @return a new escaped {@code String}, {@code null} if null string input
- *
- * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
- * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
- * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
- * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
- * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
- */
- public static final String escapeHtml4Once(final String input) {
- return ESCAPE_HTML4_ONCE.translate(input);
- }
-
-
- /**
* <p>Escapes the characters in a {@code String} using HTML entities.</p>
* <p>Supports only the HTML 3.0 entities. </p>
*
@@ -749,18 +670,6 @@ public class StringEscapeUtils {
return ESCAPE_HTML3.translate(input);
}
- /**
- * <p>Escapes the characters in a {@code String} using HTML entities.
- * But escapes them only once. i.e. does not escape already escaped characters.</p>
- * <p>Supports only the HTML 3.0 entities. </p>
- *
- * @param input the {@code String} to escape, may be null
- * @return a new escaped {@code String}, {@code null} if null string input
- */
- public static final String escapeHtml3Once(final String input) {
- return ESCAPE_HTML3_ONCE.translate(input);
- }
-
//-----------------------------------------------------------------------
/**
* <p>Unescapes a string containing entity escapes to a string
@@ -768,7 +677,7 @@ public class StringEscapeUtils {
* escapes. Supports HTML 4.0 entities.</p>
*
* <p>For example, the string {@code "<Français>"}
- * will become {@code "<Fran\ufffdais>"}</p>
+ * will become {@code "<Fran\ufffdais>"}</p>
*
* <p>If an entity is unrecognized, it is left alone, and inserted
* verbatim into the result string. e.g. {@code ">&zzzz;x"} will
http://git-wip-us.apache.org/repos/asf/commons-text/blob/e9273cd4/src/main/java/org/apache/commons/text/translate/SingleLookupTranslator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/translate/SingleLookupTranslator.java b/src/main/java/org/apache/commons/text/translate/SingleLookupTranslator.java
deleted file mode 100644
index 8fafab8..0000000
--- a/src/main/java/org/apache/commons/text/translate/SingleLookupTranslator.java
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.text.translate;
-
-import java.io.IOException;
-import java.io.Writer;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Map;
-
-/**
- * Translates a value using a lookup table.
- * But doesn't translate if that value is already translated.
- *
- * @since 1.0
- */
-public class SingleLookupTranslator extends CharSequenceTranslator {
-
- /** The lookupMap to be used for translation. */
- private final Map<String, String> lookupMap;
- /** The first character of each key in the lookupMap. */
- private final HashSet<Character> prefixSet;
- /** The length of the shortest key in the lookupMap. */
- private final int shortest;
- /** The length of the longest key in the lookupMap. */
- private final int longest;
- /** The length of the shortest value in the lookupMap. */
- private final int shortestValue;
- /** The length of the longest value in the lookupMap. */
- private final int longestValue;
-
- /**
- * Define the look tables to be used in translation.
- * <p>
- * Note that, as of Lang 3.1, the key to the lookup table is converted to a
- * java.lang.String. This is because we need the key to support hashCode and
- * equals(Object), allowing it to be the key for a HashMap. See LANG-882.
- * <p>
- * Also note that, multiple lookup tables should be passed to this translator
- * instead of passing multiple instances of this translator to the
- * AggregateTranslator. Because, this translator only checks the values of the
- * lookup table passed to this instance while deciding whether a value is
- * already translated or not.
- *
- * @param inputMaps an array of Map<CharSequence, CharSequence>.
- */
- public SingleLookupTranslator(Map<CharSequence, CharSequence>... inputMaps) {
- Map<CharSequence, CharSequence> lookup = new HashMap<>();
- for (Map<CharSequence, CharSequence> input : inputMaps) {
- Iterator<Map.Entry<CharSequence, CharSequence>> it = input.entrySet().iterator();
- while (it.hasNext()) {
- Map.Entry<CharSequence, CharSequence> pair = it.next();
- lookup.put(pair.getKey(), pair.getValue());
- }
- }
- lookupMap = new HashMap<String, String>();
- prefixSet = new HashSet<Character>();
- int _shortest = Integer.MAX_VALUE;
- int _longest = 0;
- int _shortestValue = Integer.MAX_VALUE;
- int _longestValue = 0;
- if (lookup != null) {
- Iterator<Map.Entry<CharSequence, CharSequence>> it = lookup.entrySet().iterator();
- while (it.hasNext()) {
- Map.Entry<CharSequence, CharSequence> pair = it.next();
- this.lookupMap.put(pair.getKey().toString(), pair.getValue().toString());
- this.prefixSet.add(pair.getKey().charAt(0));
- final int sz = pair.getKey().length();
- if (sz < _shortest) {
- _shortest = sz;
- }
- if (sz > _longest) {
- _longest = sz;
- }
- final int sizeOfValue = lookup.get(pair.getKey()).length();
- if (sizeOfValue < _shortestValue) {
- _shortestValue = sizeOfValue;
- }
- if (sizeOfValue > _longestValue) {
- _longestValue = sizeOfValue;
- }
- }
- }
- shortest = _shortest;
- longest = _longest;
- shortestValue = _shortestValue;
- longestValue = _longestValue;
- }
-
- /**
- * Translate a set of codepoints, represented by an int index into a CharSequence,
- * into another set of codepoints. The number of codepoints consumed must be returned,
- * and the only IOExceptions thrown must be from interacting with the Writer so that
- * the top level API may reliably ignore StringWriter IOExceptions.
- *
- * @param input CharSequence that is being translated
- * @param index int representing the current point of translation
- * @param out Writer to translate the text to
- * @return int count of codepoints consumed
- * @throws IOException if and only if the Writer produces an IOException
- */
- @Override
- public int translate(CharSequence input, int index, Writer out) throws IOException {
- // check if already translated
- int maxValue = longestValue;
- if (index + maxValue > input.length()) {
- maxValue = input.length() - index;
- }
- // implement greedy algorithm to check all the possible 'value' matches
- // for which we need to skip translation.
- for (int i = maxValue; i >= shortestValue; i--) {
- final CharSequence subSeq = input.subSequence(index, index + i);
- // If the sub-string is already translated, return without translating.
- if (lookupMap.containsValue(subSeq.toString())) {
- return 0;
- }
- }
-
- // check if translation exists for the input at position index
- if (prefixSet.contains(input.charAt(index))) {
- int max = longest;
- if (index + longest > input.length()) {
- max = input.length() - index;
- }
- // implement greedy algorithm by trying maximum match first
- for (int i = max; i >= shortest; i--) {
- final CharSequence subSeq = input.subSequence(index, index + i);
- final String result = lookupMap.get(subSeq.toString());
-
- if (result != null) {
- out.write(result);
- return i;
- }
- }
- }
- return 0;
- }
-}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/e9273cd4/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java b/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java
index f716763..ef9d8ab 100644
--- a/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java
+++ b/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java
@@ -241,56 +241,6 @@ public class StringEscapeUtilsTest {
}
@Test
- public void testEscapeHtml4Once() {
- for (final String[] element : HTML_ESCAPES) {
- final String message = element[0];
- final String expected = element[1];
- final String original = element[2];
- assertEquals(message, expected, StringEscapeUtils.escapeHtml4Once(original));
- assertEquals(message, expected, StringEscapeUtils.escapeHtml4Once(expected));
- final StringWriter sw = new StringWriter();
- try {
- StringEscapeUtils.ESCAPE_HTML4_ONCE.translate(original, sw);
- } catch (final IOException e) {
- }
- final String actual = original == null ? null : sw.toString();
- assertEquals(message, expected, actual);
- final StringWriter sw2 = new StringWriter();
- try {
- StringEscapeUtils.ESCAPE_HTML4_ONCE.translate(expected, sw2);
- } catch (final IOException e) {
- }
- final String actual2 = original == null ? null : sw2.toString();
- assertEquals(message, expected, actual2);
- }
- }
-
- @Test
- public void testEscapeHtml3Once() {
- for (final String[] element : HTML_ESCAPES) {
- final String message = element[0];
- final String expected = element[1];
- final String original = element[2];
- assertEquals(message, expected, StringEscapeUtils.escapeHtml3Once(original));
- assertEquals(message, expected, StringEscapeUtils.escapeHtml3Once(expected));
- final StringWriter sw = new StringWriter();
- try {
- StringEscapeUtils.ESCAPE_HTML3_ONCE.translate(original, sw);
- } catch (final IOException e) {
- }
- final String actual = original == null ? null : sw.toString();
- assertEquals(message, expected, actual);
- final StringWriter sw2 = new StringWriter();
- try {
- StringEscapeUtils.ESCAPE_HTML3_ONCE.translate(expected, sw2);
- } catch (final IOException e) {
- }
- final String actual2 = original == null ? null : sw2.toString();
- assertEquals(message, expected, actual2);
- }
- }
-
- @Test
public void testUnescapeHtml4() {
for (final String[] element : HTML_ESCAPES) {
final String message = element[0];