You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sqoop.apache.org by ma...@apache.org on 2016/12/16 10:50:25 UTC

sqoop git commit: SQOOP-3075: Simplify Unicode character support in source files (introduced by SQOOP-3074) by defining explicit locales instead of using EscapeUtils

Repository: sqoop
Updated Branches:
  refs/heads/trunk 5771a2da5 -> be30a344e


SQOOP-3075: Simplify Unicode character support in
source files (introduced by SQOOP-3074) by
defining explicit locales instead of using
EscapeUtils

(Attila Szabo)


Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/be30a344
Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/be30a344
Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/be30a344

Branch: refs/heads/trunk
Commit: be30a344ee28ae60fcce9e9e45a0ec73c93209a7
Parents: 5771a2d
Author: Attila Szabo <ma...@apache.org>
Authored: Fri Dec 16 11:48:52 2016 +0100
Committer: Attila Szabo <ma...@apache.org>
Committed: Fri Dec 16 11:48:52 2016 +0100

----------------------------------------------------------------------
 src/java/org/apache/sqoop/avro/AvroUtil.java          |  6 +-----
 .../org/apache/sqoop/orm/AvroSchemaGenerator.java     |  5 +----
 src/java/org/apache/sqoop/orm/ClassWriter.java        | 14 +++-----------
 src/java/org/apache/sqoop/orm/CompilationManager.java |  4 ++++
 4 files changed, 9 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/sqoop/blob/be30a344/src/java/org/apache/sqoop/avro/AvroUtil.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/avro/AvroUtil.java b/src/java/org/apache/sqoop/avro/AvroUtil.java
index 8d90130..ee29f14 100644
--- a/src/java/org/apache/sqoop/avro/AvroUtil.java
+++ b/src/java/org/apache/sqoop/avro/AvroUtil.java
@@ -28,7 +28,6 @@ import org.apache.avro.generic.GenericFixed;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.io.DatumReader;
 import org.apache.avro.mapred.FsInput;
-import org.apache.commons.lang.StringEscapeUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
@@ -107,10 +106,7 @@ public final class AvroUtil {
    * Convert Column name into Avro column name.
    */
   public static String toAvroColumn(String column) {
-    // We're unescaping identifiers to get the real Unicode characters
-    // back, and not the escaped versions.
-    String candidate = StringEscapeUtils.unescapeJava(
-        ClassWriter.toJavaIdentifier(column));
+    String candidate = ClassWriter.toJavaIdentifier(column);
     return toAvroIdentifier(candidate);
   }
 

http://git-wip-us.apache.org/repos/asf/sqoop/blob/be30a344/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java b/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java
index 5b1c745..3c31c43 100644
--- a/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java
+++ b/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java
@@ -29,7 +29,6 @@ import org.apache.avro.LogicalType;
 import org.apache.avro.Schema;
 import org.apache.avro.Schema.Field;
 import org.apache.avro.Schema.Type;
-import org.apache.commons.lang.StringEscapeUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 
@@ -89,9 +88,7 @@ public class AvroSchemaGenerator {
 
     List<Field> fields = new ArrayList<Field>();
     for (String columnName : columnNames) {
-      // We're unescaping identifiers to get the real Unicode characters
-      // back, and not the escaped versions.
-      String cleanedCol = AvroUtil.toAvroIdentifier(StringEscapeUtils.unescapeJava(ClassWriter.toJavaIdentifier(columnName)));
+      String cleanedCol = AvroUtil.toAvroIdentifier(ClassWriter.toJavaIdentifier(columnName));
       List<Integer> columnInfoList = columnInfo.get(columnName);
       int sqlType = columnInfoList.get(0);
       Integer precision = columnInfoList.get(1);

http://git-wip-us.apache.org/repos/asf/sqoop/blob/be30a344/src/java/org/apache/sqoop/orm/ClassWriter.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/orm/ClassWriter.java b/src/java/org/apache/sqoop/orm/ClassWriter.java
index 0c8d86d..c18a36f 100644
--- a/src/java/org/apache/sqoop/orm/ClassWriter.java
+++ b/src/java/org/apache/sqoop/orm/ClassWriter.java
@@ -24,6 +24,7 @@ import java.io.IOException;
 import java.io.OutputStream;
 import java.io.OutputStreamWriter;
 import java.io.Writer;
+import java.nio.charset.StandardCharsets;
 import java.util.Date;
 import java.util.HashSet;
 import java.util.List;
@@ -284,16 +285,7 @@ public class ClassWriter {
       return "_" + output;
     }
 
-    // Calling StringEscapeUtils#escapeJava is required because we'd like to
-    // support Unicode characters in identifiers even if the locale of the host
-    // system is not supporting UTF-8, or by any reason the locale is different
-    // from that. Good example: if a column name would contain a \uC3A1 char
-    // in it's name, though the locale would not support Unicode characters
-    // then the generated java file would contain unrecognizable characters
-    // for the compiler, and javac would fail with a compile error. If the name
-    // of the column would be Alm\uC3A1a then it would be Alm\uC3A1a after the
-    // escaping, and this every places where it's used/
-    return StringEscapeUtils.escapeJava(output);
+    return output;
   }
 
   private String toJavaType(String columnName, int sqlType) {
@@ -1796,7 +1788,7 @@ public class ClassWriter {
     Writer writer = null;
     try {
       ostream = new FileOutputStream(filename);
-      writer = new OutputStreamWriter(ostream);
+      writer = new OutputStreamWriter(ostream, StandardCharsets.UTF_8);
       writer.append(sb.toString());
     } finally {
       if (null != writer) {

http://git-wip-us.apache.org/repos/asf/sqoop/blob/be30a344/src/java/org/apache/sqoop/orm/CompilationManager.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/orm/CompilationManager.java b/src/java/org/apache/sqoop/orm/CompilationManager.java
index 0a2a87f..c1a656b 100644
--- a/src/java/org/apache/sqoop/orm/CompilationManager.java
+++ b/src/java/org/apache/sqoop/orm/CompilationManager.java
@@ -23,6 +23,7 @@ import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
@@ -170,6 +171,9 @@ public class CompilationManager {
     String curClasspath = System.getProperty("java.class.path");
     LOG.debug("Current sqoop classpath = " + curClasspath);
 
+    args.add("-encoding");
+    args.add(StandardCharsets.UTF_8.toString());
+
     args.add("-sourcepath");
     args.add(jarOutDir);