You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sqoop.apache.org by ja...@apache.org on 2016/05/19 14:30:44 UTC

[3/3] sqoop git commit: SQOOP-2920: sqoop performance deteriorates significantly on wide datasets; sqoop 100% on cpu

SQOOP-2920: sqoop performance deteriorates significantly on wide datasets; sqoop 100% on cpu

(Attila Szabo via Jarek Jarcec Cecho)


Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/28bbe4d4
Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/28bbe4d4
Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/28bbe4d4

Branch: refs/heads/trunk
Commit: 28bbe4d4614d55f834e58b07285e32a97522e329
Parents: 5779aec
Author: Jarek Jarcec Cecho <ja...@apache.org>
Authored: Thu May 19 07:27:35 2016 -0700
Committer: Jarek Jarcec Cecho <ja...@apache.org>
Committed: Thu May 19 07:27:35 2016 -0700

----------------------------------------------------------------------
 src/java/org/apache/sqoop/orm/ClassWriter.java  | 83 ++++++++++----------
 .../apache/sqoop/orm/CompilationManager.java    |  8 +-
 .../com/cloudera/sqoop/orm/TestClassWriter.java | 63 ++++++++++++++-
 3 files changed, 107 insertions(+), 47 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/sqoop/blob/28bbe4d4/src/java/org/apache/sqoop/orm/ClassWriter.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/orm/ClassWriter.java b/src/java/org/apache/sqoop/orm/ClassWriter.java
index 23a9c41..9d91887 100644
--- a/src/java/org/apache/sqoop/orm/ClassWriter.java
+++ b/src/java/org/apache/sqoop/orm/ClassWriter.java
@@ -1064,57 +1064,49 @@ public class ClassWriter {
    * @param colNames - ordered list of column names for table.
    * @param sb - StringBuilder to append code to
    */
-  private void generateSetField(Map<String, Integer> columnTypes,
-      String [] colNames, String [] rawColNames, StringBuilder sb) {
-
-    int numberOfMethods =
-            this.getNumberOfMethods(colNames, maxColumnsPerMethod);
+  private void generateSetField(Map<String, Integer> columnTypes, String[] colNames, String[] rawColNames,
+      StringBuilder sb) {
+    String sep = System.getProperty("line.separator");
+    sb.append("  public void setField(String __fieldName, Object __fieldVal) " + "{" + sep);
+    sb.append("    if (!setters.containsKey(__fieldName)) {" + sep);
+    sb.append("      throw new RuntimeException(\"No such field:\"+__fieldName);" + sep);
+    sb.append("    }" + sep);
+    sb.append("    setters.get(__fieldName).setField(__fieldVal);" + sep);
+    sb.append("  }\n" + sep);
+  }
 
-    sb.append("  public void setField(String __fieldName, Object __fieldVal) "
-        + "{\n");
-    if (numberOfMethods > 1) {
-      boolean first = true;
-      for (int i = 0; i < numberOfMethods; ++i) {
-        if (!first) {
-          sb.append("    else");
-        }
-        sb.append("    if (this.setField" + i
-                + "(__fieldName, __fieldVal)) {\n");
-        sb.append("      return;\n");
-        sb.append("    }\n");
-        first = false;
-      }
-    } else {
-      boolean first = true;
-      for (int i = 0; i < colNames.length; i++) {
-        int sqlType = columnTypes.get(colNames[i]);
-        String javaType = toJavaType(colNames[i], sqlType);
+  private void generateConstructorAndInitMethods(Map<String, Integer> colTypes, String[] colNames, String[] rawColNames,
+      String typeName, StringBuilder sb) {
+    String sep = System.getProperty("line.separator");
+    int numberOfMethods = getNumberOfMethods(colNames, maxColumnsPerMethod);
+    for (int methodNumber = 0; methodNumber < numberOfMethods; ++methodNumber) {
+      sb.append("  private void init" + methodNumber + "() {" + sep);
+      for (int i = methodNumber * maxColumnsPerMethod; i < topBoundary(colNames, methodNumber,
+          maxColumnsPerMethod); ++i) {
+        String colName = colNames[i];
+        String rawColName = rawColNames[i];
+        int sqlType = colTypes.get(colName);
+        String javaType = toJavaType(colName, sqlType);
         if (null == javaType) {
+          LOG.error("Cannot resolve SQL type " + sqlType);
           continue;
         } else {
-          if (!first) {
-            sb.append("    else");
-          }
-
-          sb.append("    if (\"" + serializeRawColName(rawColNames[i]) + "\".equals(__fieldName)) {\n");
-          sb.append("      this." + colNames[i] + " = (" + javaType
-              + ") __fieldVal;\n");
-          sb.append("    }\n");
-          first = false;
+          sb.append("    setters.put(\"" + serializeRawColName(rawColName) + "\", new FieldSetterCommand() {" + sep);
+          sb.append("      @Override" + sep);
+          sb.append("      public void setField(Object value) {" + sep);
+          sb.append("        " + colName + " = (" + javaType + ")value;" + sep);
+          sb.append("      }" + sep);
+          sb.append("    });" + sep);
         }
       }
+      sb.append("  }" + sep);
     }
-    sb.append("    else {\n");
-    sb.append("      throw new RuntimeException(");
-    sb.append("\"No such field: \" + __fieldName);\n");
-    sb.append("    }\n");
-    sb.append("  }\n");
-
+    sb.append("  public " + typeName + "() {" + sep);
     for (int i = 0; i < numberOfMethods; ++i) {
-      myGenerateSetField(columnTypes, colNames, rawColNames, sb, i, maxColumnsPerMethod);
+      sb.append("    init" + i + "();" + sep);
     }
+    sb.append("  }" + sep);
   }
-
   /**
    * Raw column name is a column name as it was created on database and we need to serialize it between
    * double quotes into java class that will be further complied with javac. Various databases supports
@@ -1184,7 +1176,7 @@ public class ClassWriter {
 
     sb.append("  public Map<String, Object> getFieldMap() {\n");
     sb.append("    Map<String, Object> __sqoop$field_map = "
-        + "new TreeMap<String, Object>();\n");
+        + "new HashMap<String, Object>();\n");
     if (numberOfMethods > 1) {
       for (int i = 0; i < numberOfMethods; ++i) {
         sb.append("    this.getFieldMap" + i + "(__sqoop$field_map);\n");
@@ -1934,7 +1926,7 @@ public class ClassWriter {
     sb.append("import java.util.Iterator;\n");
     sb.append("import java.util.List;\n");
     sb.append("import java.util.Map;\n");
-    sb.append("import java.util.TreeMap;\n");
+    sb.append("import java.util.HashMap;\n");
     sb.append("\n");
 
     String className = tableNameInfo.getShortClassForTable(tableName);
@@ -1944,7 +1936,12 @@ public class ClassWriter {
         + CLASS_WRITER_VERSION + ";\n");
     sb.append(
         "  public int getClassFormatVersion() { return PROTOCOL_VERSION; }\n");
+    sb.append("  public static interface FieldSetterCommand {");
+    sb.append("    void setField(Object value);");
+    sb.append("  }");
     sb.append("  protected ResultSet __cur_result_set;\n");
+    sb.append("  private Map<String, FieldSetterCommand> setters = new HashMap<String, FieldSetterCommand>();\n");
+    generateConstructorAndInitMethods(columnTypes, colNames, rawColNames, className, sb);
     generateFields(columnTypes, colNames, className, sb);
     generateEquals(columnTypes, colNames, className, sb);
     generateDbRead(columnTypes, colNames, sb);

http://git-wip-us.apache.org/repos/asf/sqoop/blob/28bbe4d4/src/java/org/apache/sqoop/orm/CompilationManager.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/orm/CompilationManager.java b/src/java/org/apache/sqoop/orm/CompilationManager.java
index ce165e8..0a2a87f 100644
--- a/src/java/org/apache/sqoop/orm/CompilationManager.java
+++ b/src/java/org/apache/sqoop/orm/CompilationManager.java
@@ -296,11 +296,15 @@ public class CompilationManager {
         // we only record the subdir parts in the zip entry.
         String fullPath = entry.getAbsolutePath();
         String chompedPath = fullPath.substring(baseDirName.length());
+        int indexOfDollarSign = chompedPath.indexOf("$");
+        String innerTypesChompedPath = chompedPath
+            .substring(0, indexOfDollarSign == -1 ? chompedPath.length() : indexOfDollarSign);
 
         boolean include = chompedPath.endsWith(".class")
-            && sources.contains(
+            && (sources.contains(
             chompedPath.substring(0, chompedPath.length() - ".class".length())
-            + ".java");
+                    + ".java")
+                || sources.contains(innerTypesChompedPath + ".java"));
 
         if (include) {
           // include this file.

http://git-wip-us.apache.org/repos/asf/sqoop/blob/28bbe4d4/src/test/com/cloudera/sqoop/orm/TestClassWriter.java
----------------------------------------------------------------------
diff --git a/src/test/com/cloudera/sqoop/orm/TestClassWriter.java b/src/test/com/cloudera/sqoop/orm/TestClassWriter.java
index 498db73..10a0969 100644
--- a/src/test/com/cloudera/sqoop/orm/TestClassWriter.java
+++ b/src/test/com/cloudera/sqoop/orm/TestClassWriter.java
@@ -26,6 +26,7 @@ import java.lang.reflect.Method;
 import java.sql.Connection;
 import java.sql.Statement;
 import java.sql.SQLException;
+import java.util.Random;
 import java.util.jar.JarEntry;
 import java.util.jar.JarInputStream;
 
@@ -57,6 +58,9 @@ public class TestClassWriter extends TestCase {
 
   public static final Log LOG =
       LogFactory.getLog(TestClassWriter.class.getName());
+  private static final String WIDE_TABLE_NAME = "WIDETABLE";
+  private static final int WIDE_TABLE_COLUMN_COUNT = 800;
+  private static final int WIDE_TABLE_ROW_COUNT = 20_000;
 
   // instance variables populated during setUp, used during tests
   private HsqldbTestServer testServer;
@@ -122,12 +126,16 @@ public class TestClassWriter extends TestCase {
   static final String JAR_GEN_DIR = ImportJobTestCase.TEMP_BASE_DIR
       + "sqoop/test/jargen";
 
+  private File runGenerationTest(String[] argv, String classNameToCheck) {
+    return runGenerationTest(argv, classNameToCheck, HsqldbTestServer.getTableName());
+  }
+
   /**
    * Run a test to verify that we can generate code and it emits the output
    * files where we expect them.
    * @return
    */
-  private File runGenerationTest(String [] argv, String classNameToCheck) {
+  private File runGenerationTest(String[] argv, String classNameToCheck, String tableName) {
     File codeGenDirFile = new File(CODE_GEN_DIR);
     File classGenDirFile = new File(JAR_GEN_DIR);
 
@@ -140,7 +148,7 @@ public class TestClassWriter extends TestCase {
 
     CompilationManager compileMgr = new CompilationManager(options);
     ClassWriter writer = new ClassWriter(options, manager,
-        HsqldbTestServer.getTableName(), compileMgr);
+        tableName, compileMgr);
 
     try {
       writer.generate();
@@ -675,4 +683,55 @@ public class TestClassWriter extends TestCase {
     };
     runFailedGenerationTest(argv, HsqldbTestServer.getTableName());
   }
+
+  @Test(timeout = 10000)
+  public void testWideTableClassGeneration() throws Exception {
+    createWideTable();
+    options = new SqoopOptions(HsqldbTestServer.getDbUrl(), WIDE_TABLE_NAME);
+
+    // Set the option strings in an "argv" to redirect our srcdir and bindir.
+    String [] argv = {
+      "--bindir",
+      JAR_GEN_DIR,
+      "--outdir",
+      CODE_GEN_DIR,
+    };
+
+    File ormJarFile = runGenerationTest(argv, WIDE_TABLE_NAME, WIDE_TABLE_NAME);
+
+    ClassLoader prevClassLoader = ClassLoaderStack.addJarFile(ormJarFile.getCanonicalPath(),
+        WIDE_TABLE_NAME);
+    Class tableClass = Class.forName(WIDE_TABLE_NAME, true,
+        Thread.currentThread().getContextClassLoader());
+
+    Object instance = tableClass.newInstance();
+    Method setterMethod = tableClass.getMethod("setField", String.class, Object.class);
+    Random random = new Random(0);
+    for (int j = 0; j < WIDE_TABLE_ROW_COUNT; ++j) {
+      for (int i = 0; i < WIDE_TABLE_COLUMN_COUNT; ++i) {
+        setterMethod.invoke(instance, "INTFIELD" + i, random.nextInt());
+      }
+    }
+
+    if (null != prevClassLoader) {
+      ClassLoaderStack.setCurrentClassLoader(prevClassLoader);
+    }
+  }
+
+  private void createWideTable() throws Exception {
+    try (Connection conn = testServer.getConnection(); Statement stmt = conn.createStatement();) {
+      stmt.executeUpdate("DROP TABLE \"" + WIDE_TABLE_NAME + "\" IF EXISTS");
+      StringBuilder sb = new StringBuilder("CREATE TABLE \"" + WIDE_TABLE_NAME + "\" (");
+      for (int i = 0; i < WIDE_TABLE_COLUMN_COUNT; ++i) {
+        sb.append("intField" + i + " INT");
+        if (i < WIDE_TABLE_COLUMN_COUNT - 1) {
+          sb.append(",");
+        } else {
+          sb.append(")");
+        }
+      }
+      stmt.executeUpdate(sb.toString());
+      conn.commit();
+    }
+  }
 }