You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ra...@apache.org on 2014/10/31 19:22:36 UTC

git commit: HDFS-7309. XMLUtils.mangleXmlString doesn't seem to handle less than sign. (Colin Patrick McCabe via raviprak)

Repository: hadoop
Updated Branches:
  refs/heads/trunk b6c1188b8 -> c7f81dad3


HDFS-7309. XMLUtils.mangleXmlString doesn't seem to handle less than sign. (Colin Patrick McCabe via raviprak)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/c7f81dad
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/c7f81dad
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/c7f81dad

Branch: refs/heads/trunk
Commit: c7f81dad30c391822eed7273278cf5885fa59264
Parents: b6c1188
Author: Ravi Prakash <ra...@altiscale.com>
Authored: Fri Oct 31 11:22:25 2014 -0700
Committer: Ravi Prakash <ra...@altiscale.com>
Committed: Fri Oct 31 11:22:25 2014 -0700

----------------------------------------------------------------------
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt     |  3 +
 .../OfflineEditsXmlLoader.java                  |  4 +-
 .../offlineImageViewer/PBImageXmlWriter.java    |  3 +-
 .../offlineImageViewer/XmlImageVisitor.java     |  3 +-
 .../org/apache/hadoop/hdfs/util/XMLUtils.java   | 79 +++++++++++++++++---
 .../apache/hadoop/hdfs/util/TestXMLUtils.java   | 31 ++++++--
 6 files changed, 102 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/c7f81dad/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 7010c4a..b1ea79c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -694,6 +694,9 @@ Release 2.6.0 - UNRELEASED
 
   BUG FIXES
 
+    HDFS-7309. XMLUtils.mangleXmlString doesn't seem to handle less than sign
+    (Colin Patrick McCabe via raviprak)
+
     HDFS-6823. dfs.web.authentication.kerberos.principal shows up in logs for 
     insecure HDFS (Allen Wittenauer via raviprak)
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/c7f81dad/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java
index cf761cc..1882e58 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java
@@ -177,7 +177,7 @@ class OfflineEditsXmlLoader
   
   @Override
   public void endElement (String uri, String name, String qName) {
-    String str = XMLUtils.unmangleXmlString(cbuf.toString()).trim();
+    String str = XMLUtils.unmangleXmlString(cbuf.toString(), false).trim();
     cbuf = new StringBuffer();
     switch (state) {
     case EXPECT_EDITS_TAG:
@@ -260,4 +260,4 @@ class OfflineEditsXmlLoader
   public void characters (char ch[], int start, int length) {
     cbuf.append(ch, start, length);
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/c7f81dad/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageXmlWriter.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageXmlWriter.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageXmlWriter.java
index df00499..fa8c59d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageXmlWriter.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageXmlWriter.java
@@ -411,7 +411,8 @@ public final class PBImageXmlWriter {
   }
 
   private PBImageXmlWriter o(final String e, final Object v) {
-    out.print("<" + e + ">" + XMLUtils.mangleXmlString(v.toString()) + "</" + e + ">");
+    out.print("<" + e + ">" +
+        XMLUtils.mangleXmlString(v.toString(), true) + "</" + e + ">");
     return this;
   }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/c7f81dad/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/XmlImageVisitor.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/XmlImageVisitor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/XmlImageVisitor.java
index 2719109..44593a3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/XmlImageVisitor.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/XmlImageVisitor.java
@@ -84,6 +84,7 @@ public class XmlImageVisitor extends TextWriterImageVisitor {
   }
 
   private void writeTag(String tag, String value) throws IOException {
-    write("<" + tag + ">" + XMLUtils.mangleXmlString(value) + "</" + tag + ">\n");
+    write("<" + tag + ">" +
+        XMLUtils.mangleXmlString(value, true) + "</" + tag + ">\n");
   }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/c7f81dad/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/XMLUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/XMLUtils.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/XMLUtils.java
index a032408..f23b021 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/XMLUtils.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/XMLUtils.java
@@ -94,6 +94,23 @@ public class XMLUtils {
     return String.format("\\%0" + NUM_SLASH_POSITIONS + "x;", cp);
   }
 
+  private static String codePointToEntityRef(int cp) {
+    switch (cp) {
+      case '&':
+        return "&amp;";
+      case '\"':
+        return "&quot;";
+      case '\'':
+        return "&apos;";
+      case '<':
+        return "&lt;";
+      case '>':
+        return "&gt;";
+      default:
+        return null;
+    }
+  }
+
   /**
    * Mangle a string so that it can be represented in an XML document.
    * 
@@ -117,7 +134,7 @@ public class XMLUtils {
    *
    * @return        The mangled string.
    */
-  public static String mangleXmlString(String str) {
+  public static String mangleXmlString(String str, boolean createEntityRefs) {
     final StringBuilder bld = new StringBuilder();
     final int length = str.length();
     for (int offset = 0; offset < length; ) {
@@ -126,8 +143,16 @@ public class XMLUtils {
        if (codePointMustBeMangled(cp)) {
          bld.append(mangleCodePoint(cp));
        } else {
-         for (int i = 0; i < len; i++) {
-           bld.append(str.charAt(offset + i));
+         String entityRef = null;
+         if (createEntityRefs) {
+           entityRef = codePointToEntityRef(cp);
+         }
+         if (entityRef != null) {
+           bld.append(entityRef);
+         } else {
+           for (int i = 0; i < len; i++) {
+             bld.append(str.charAt(offset + i));
+           }
          }
        }
        offset += len;
@@ -137,22 +162,42 @@ public class XMLUtils {
 
   /**
    * Demangle a string from an XML document.
-   * See {@link #mangleXmlString(String)} for a description of the mangling
-   * format.
+   * See {@link #mangleXmlString(String, boolean)} for a description of the
+   * mangling format.
    *
    * @param str    The string to be demangled.
    * 
    * @return       The unmangled string
    * @throws       UnmanglingError if the input is malformed.
    */
-  public static String unmangleXmlString(String str)
+  public static String unmangleXmlString(String str, boolean decodeEntityRefs)
         throws UnmanglingError {
     int slashPosition = -1;
     String escapedCp = "";
     StringBuilder bld = new StringBuilder();
+    StringBuilder entityRef = null;
     for (int i = 0; i < str.length(); i++) {
       char ch = str.charAt(i);
-      if ((slashPosition >= 0) && (slashPosition < NUM_SLASH_POSITIONS)) {
+      if (entityRef != null) {
+        entityRef.append(ch);
+        if (ch == ';') {
+          String e = entityRef.toString();
+          if (e.equals("&quot;")) {
+            bld.append("\"");
+          } else if (e.equals("&apos;")) {
+            bld.append("\'");
+          } else if (e.equals("&amp;")) {
+            bld.append("&");
+          } else if (e.equals("&lt;")) {
+            bld.append("<");
+          } else if (e.equals("&gt;")) {
+            bld.append(">");
+          } else {
+            throw new UnmanglingError("Unknown entity ref " + e);
+          }
+          entityRef = null;
+        }
+      } else  if ((slashPosition >= 0) && (slashPosition < NUM_SLASH_POSITIONS)) {
         escapedCp += ch;
         ++slashPosition;
       } else if (slashPosition == NUM_SLASH_POSITIONS) {
@@ -170,10 +215,22 @@ public class XMLUtils {
       } else if (ch == '\\') {
         slashPosition = 0;
       } else {
-        bld.append(ch);
+        boolean startingEntityRef = false;
+        if (decodeEntityRefs) {
+          startingEntityRef = (ch == '&');
+        }
+        if (startingEntityRef) {
+          entityRef = new StringBuilder();
+          entityRef.append("&");
+        } else {
+          bld.append(ch);
+        }
       }
     }
-    if (slashPosition != -1) {
+    if (entityRef != null) {
+      throw new UnmanglingError("unterminated entity ref starting with " +
+          entityRef.toString());
+    } else if (slashPosition != -1) {
       throw new UnmanglingError("unterminated code point escape: string " +
           "broke off in the middle");
     }
@@ -185,12 +242,12 @@ public class XMLUtils {
    *
    * @param contentHandler     the SAX content handler
    * @param tag                the element tag to use  
-   * @param value              the string to put inside the tag
+   * @param val                the string to put inside the tag
    */
   public static void addSaxString(ContentHandler contentHandler,
       String tag, String val) throws SAXException {
     contentHandler.startElement("", "", tag, new AttributesImpl());
-    char c[] = mangleXmlString(val).toCharArray();
+    char c[] = mangleXmlString(val, false).toCharArray();
     contentHandler.characters(c, 0, c.length);
     contentHandler.endElement("", "", tag);
   }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/c7f81dad/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestXMLUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestXMLUtils.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestXMLUtils.java
index f3ab56c..16df254 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestXMLUtils.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestXMLUtils.java
@@ -22,11 +22,21 @@ import org.junit.Assert;
 import org.junit.Test;
 
 public class TestXMLUtils {
+  private static void testRoundTripImpl(String str, String expectedMangled,
+                                    boolean encodeEntityRefs) {
+    String mangled = XMLUtils.mangleXmlString(str, encodeEntityRefs);
+    Assert.assertEquals(expectedMangled, mangled);
+    String unmangled = XMLUtils.unmangleXmlString(mangled, encodeEntityRefs);
+    Assert.assertEquals(str, unmangled);
+  }
+
   private static void testRoundTrip(String str, String expectedMangled) {
-    String mangled = XMLUtils.mangleXmlString(str);
-    Assert.assertEquals(mangled, expectedMangled);
-    String unmangled = XMLUtils.unmangleXmlString(mangled);
-    Assert.assertEquals(unmangled, str);
+    testRoundTripImpl(str, expectedMangled, false);
+  }
+
+  private static void testRoundTripWithEntityRefs(String str,
+                          String expectedMangled) {
+    testRoundTripImpl(str, expectedMangled, true);
   }
 
   @Test
@@ -54,16 +64,25 @@ public class TestXMLUtils {
   @Test
   public void testInvalidSequence() throws Exception {
     try {
-      XMLUtils.unmangleXmlString("\\000g;foo");
+      XMLUtils.unmangleXmlString("\\000g;foo", false);
       Assert.fail("expected an unmangling error");
     } catch (UnmanglingError e) {
       // pass through
     }
     try {
-      XMLUtils.unmangleXmlString("\\0");
+      XMLUtils.unmangleXmlString("\\0", false);
       Assert.fail("expected an unmangling error");
     } catch (UnmanglingError e) {
       // pass through
     }
   }
+
+  @Test
+  public void testAddEntityRefs() throws Exception {
+    testRoundTripWithEntityRefs("The Itchy & Scratchy Show",
+        "The Itchy &amp; Scratchy Show");
+    testRoundTripWithEntityRefs("\"He said '1 < 2, but 2 > 1'\"",
+        "&quot;He said &apos;1 &lt; 2, but 2 &gt; 1&apos;&quot;");
+    testRoundTripWithEntityRefs("\u0001 < \u0002", "\\0001; &lt; \\0002;");
+  }
 }