You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cn...@apache.org on 2013/08/01 22:03:32 UTC

svn commit: r1509405 - in /hadoop/common/trunk/hadoop-common-project/hadoop-common: CHANGES.txt src/main/java/org/apache/hadoop/conf/Configuration.java src/test/java/org/apache/hadoop/conf/TestConfiguration.java

Author: cnauroth
Date: Thu Aug  1 20:03:31 2013
New Revision: 1509405

URL: http://svn.apache.org/r1509405
Log:
HADOOP-9801. Configuration#writeXml uses platform defaulting encoding, which may mishandle multi-byte characters. Contributed by Chris Nauroth.

Modified:
    hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt
    hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
    hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java

Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt?rev=1509405&r1=1509404&r2=1509405&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt Thu Aug  1 20:03:31 2013
@@ -333,6 +333,9 @@ Release 2.1.1-beta - UNRELEASED
     HADOOP-9768. chown and chgrp reject users and groups with spaces on platforms
     where spaces are otherwise acceptable. (cnauroth)
 
+    HADOOP-9801. Configuration#writeXml uses platform defaulting encoding, which
+    may mishandle multi-byte characters. (cnauroth)
+
 Release 2.1.0-beta - 2013-08-06
 
   INCOMPATIBLE CHANGES

Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java?rev=1509405&r1=1509404&r2=1509405&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java (original)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java Thu Aug  1 20:03:31 2013
@@ -2181,12 +2181,12 @@ public class Configuration implements It
 
   /** 
    * Write out the non-default properties in this configuration to the given
-   * {@link OutputStream}.
+   * {@link OutputStream} using UTF-8 encoding.
    * 
    * @param out the output stream to write to.
    */
   public void writeXml(OutputStream out) throws IOException {
-    writeXml(new OutputStreamWriter(out));
+    writeXml(new OutputStreamWriter(out, "UTF-8"));
   }
 
   /** 

Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java?rev=1509405&r1=1509404&r2=1509405&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java (original)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java Thu Aug  1 20:03:31 2013
@@ -21,9 +21,11 @@ import java.io.BufferedWriter;
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.File;
+import java.io.FileOutputStream;
 import java.io.FileWriter;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.OutputStreamWriter;
 import java.io.StringWriter;
 import java.net.InetAddress;
 import java.net.InetSocketAddress;
@@ -44,6 +46,7 @@ import static org.junit.Assert.assertArr
 import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.conf.Configuration.IntegerRanges;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.net.NetUtils;
 import static org.apache.hadoop.util.PlatformName.IBM_JAVA;
 import org.codehaus.jackson.map.ObjectMapper; 
@@ -53,6 +56,10 @@ public class TestConfiguration extends T
   private Configuration conf;
   final static String CONFIG = new File("./test-config-TestConfiguration.xml").getAbsolutePath();
   final static String CONFIG2 = new File("./test-config2-TestConfiguration.xml").getAbsolutePath();
+  private static final String CONFIG_MULTI_BYTE = new File(
+    "./test-config-multi-byte-TestConfiguration.xml").getAbsolutePath();
+  private static final String CONFIG_MULTI_BYTE_SAVED = new File(
+    "./test-config-multi-byte-saved-TestConfiguration.xml").getAbsolutePath();
   final static Random RAN = new Random();
   final static String XMLHEADER = 
             IBM_JAVA?"<?xml version=\"1.0\" encoding=\"UTF-8\"?><configuration>":
@@ -69,6 +76,8 @@ public class TestConfiguration extends T
     super.tearDown();
     new File(CONFIG).delete();
     new File(CONFIG2).delete();
+    new File(CONFIG_MULTI_BYTE).delete();
+    new File(CONFIG_MULTI_BYTE_SAVED).delete();
   }
   
   private void startConfig() throws IOException{
@@ -101,6 +110,41 @@ public class TestConfiguration extends T
     assertEquals("A", conf.get("prop"));
   }
 
+  /**
+   * Tests use of multi-byte characters in property names and values.  This test
+   * round-trips multi-byte string literals through saving and loading of config
+   * and asserts that the same values were read.
+   */
+  public void testMultiByteCharacters() throws IOException {
+    String priorDefaultEncoding = System.getProperty("file.encoding");
+    try {
+      System.setProperty("file.encoding", "US-ASCII");
+      String name = "multi_byte_\u611b_name";
+      String value = "multi_byte_\u0641_value";
+      out = new BufferedWriter(new OutputStreamWriter(
+        new FileOutputStream(CONFIG_MULTI_BYTE), "UTF-8"));
+      startConfig();
+      declareProperty(name, value, value);
+      endConfig();
+
+      Configuration conf = new Configuration(false);
+      conf.addResource(new Path(CONFIG_MULTI_BYTE));
+      assertEquals(value, conf.get(name));
+      FileOutputStream fos = new FileOutputStream(CONFIG_MULTI_BYTE_SAVED);
+      try {
+        conf.writeXml(fos);
+      } finally {
+        IOUtils.closeStream(fos);
+      }
+
+      conf = new Configuration(false);
+      conf.addResource(new Path(CONFIG_MULTI_BYTE_SAVED));
+      assertEquals(value, conf.get(name));
+    } finally {
+      System.setProperty("file.encoding", priorDefaultEncoding);
+    }
+  }
+
   public void testVariableSubstitution() throws IOException {
     out=new BufferedWriter(new FileWriter(CONFIG));
     startConfig();