You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by to...@apache.org on 2009/12/15 19:17:35 UTC

svn commit: r890928 - in /hadoop/mapreduce/trunk: ./ src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/orm/ src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/orm/

Author: tomwhite
Date: Tue Dec 15 18:17:35 2009
New Revision: 890928

URL: http://svn.apache.org/viewvc?rev=890928&view=rev
Log:
MAPREDUCE-1148. SQL identifiers are a superset of Java identifiers. Contributed by Aaron Kimball.

Modified:
    hadoop/mapreduce/trunk/CHANGES.txt
    hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/orm/ClassWriter.java
    hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/orm/TableClassName.java
    hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/orm/TestClassWriter.java

Modified: hadoop/mapreduce/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=890928&r1=890927&r2=890928&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/CHANGES.txt (original)
+++ hadoop/mapreduce/trunk/CHANGES.txt Tue Dec 15 18:17:35 2009
@@ -139,6 +139,9 @@
 
     MAPREDUCE-1251. c++ utils doesn't compile. (Eli Collins via tomwhite)
 
+    MAPREDUCE-1148. SQL identifiers are a superset of Java identifiers.
+    (Aaron Kimball via tomwhite)
+
 Release 0.21.0 - Unreleased
 
   INCOMPATIBLE CHANGES

Modified: hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/orm/ClassWriter.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/orm/ClassWriter.java?rev=890928&r1=890927&r2=890928&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/orm/ClassWriter.java (original)
+++ hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/orm/ClassWriter.java Tue Dec 15 18:17:35 2009
@@ -33,6 +33,7 @@
 import java.io.OutputStream;
 import java.io.OutputStreamWriter;
 import java.io.Writer;
+import java.util.HashSet;
 import java.util.Map;
 
 import org.apache.commons.logging.Log;
@@ -40,14 +41,57 @@
 
 /**
  * Creates an ORM class to represent a table from a database
- *
- * 
- *
  */
 public class ClassWriter {
 
   public static final Log LOG = LogFactory.getLog(ClassWriter.class.getName());
 
+  // The following are keywords and cannot be used for class, method, or field
+  // names.
+  public static final HashSet<String> JAVA_RESERVED_WORDS;
+
+  static {
+    JAVA_RESERVED_WORDS = new HashSet<String>();
+
+    JAVA_RESERVED_WORDS.add("abstract");
+    JAVA_RESERVED_WORDS.add("else");
+    JAVA_RESERVED_WORDS.add("int");
+    JAVA_RESERVED_WORDS.add("strictfp");
+    JAVA_RESERVED_WORDS.add("assert");
+    JAVA_RESERVED_WORDS.add("enum");
+    JAVA_RESERVED_WORDS.add("interface");
+    JAVA_RESERVED_WORDS.add("super");
+    JAVA_RESERVED_WORDS.add("boolean");
+    JAVA_RESERVED_WORDS.add("extends");
+    JAVA_RESERVED_WORDS.add("long");
+    JAVA_RESERVED_WORDS.add("switch");
+    JAVA_RESERVED_WORDS.add("break");
+    JAVA_RESERVED_WORDS.add("false");
+    JAVA_RESERVED_WORDS.add("native");
+    JAVA_RESERVED_WORDS.add("synchronized");
+    JAVA_RESERVED_WORDS.add("byte");
+    JAVA_RESERVED_WORDS.add("final");
+    JAVA_RESERVED_WORDS.add("new");
+    JAVA_RESERVED_WORDS.add("this");
+    JAVA_RESERVED_WORDS.add("case");
+    JAVA_RESERVED_WORDS.add("finally");
+    JAVA_RESERVED_WORDS.add("null");
+    JAVA_RESERVED_WORDS.add("throw");
+    JAVA_RESERVED_WORDS.add("catch");
+    JAVA_RESERVED_WORDS.add("float");
+    JAVA_RESERVED_WORDS.add("package");
+    JAVA_RESERVED_WORDS.add("throws");
+    JAVA_RESERVED_WORDS.add("char");
+    JAVA_RESERVED_WORDS.add("for");
+    JAVA_RESERVED_WORDS.add("private");
+    JAVA_RESERVED_WORDS.add("transient");
+    JAVA_RESERVED_WORDS.add("class");
+    JAVA_RESERVED_WORDS.add("goto");
+    JAVA_RESERVED_WORDS.add("protected");
+    JAVA_RESERVED_WORDS.add("true");
+    JAVA_RESERVED_WORDS.add("const");
+  }
+
   /**
    * This version number is injected into all generated Java classes to denote
    * which version of the ClassWriter's output format was used to generate the
@@ -76,6 +120,87 @@
     this.compileManager = compMgr;
   }
 
+  /**
+   * Given some character that can't be in an identifier,
+   * try to map it to a string that can.
+   *
+   * @param c a character that can't be in a Java identifier
+   * @return a string of characters that can, or null if there's
+   * no good translation.
+   */
+  static String getIdentifierStrForChar(char c) {
+    if (Character.isJavaIdentifierPart(c)) {
+      return "" + c;
+    } else if (Character.isWhitespace(c)) {
+      // Eliminate whitespace.
+      return null;
+    } else {
+      // All other characters map to underscore.
+      return "_";
+    }
+  }
+
+  /**
+   * @param word a word to test.
+   * @return true if 'word' is reserved the in Java language.
+   */
+  private static boolean isReservedWord(String word) {
+    return JAVA_RESERVED_WORDS.contains(word);
+  }
+
+  /**
+   * Coerce a candidate name for an identifier into one which will
+   * definitely compile.
+   *
+   * Ensures that the returned identifier matches [A-Za-z_][A-Za-z0-9_]*
+   * and is not a reserved word.
+   *
+   * @param candidate A string we want to use as an identifier
+   * @return A string naming an identifier which compiles and is
+   *   similar to the candidate.
+   */
+  public static String toIdentifier(String candidate) {
+    StringBuilder sb = new StringBuilder();
+    boolean first = true;
+    for (char c : candidate.toCharArray()) {
+      if (Character.isJavaIdentifierStart(c) && first) {
+        // Ok for this to be the first character of the identifier.
+        sb.append(c);
+        first = false;
+      } else if (Character.isJavaIdentifierPart(c) && !first) {
+        // Ok for this character to be in the output identifier.
+        sb.append(c);
+      } else {
+        // We have a character in the original that can't be
+        // part of this identifier we're building.
+        // If it's just not allowed to be the first char, add a leading '_'.
+        // If we have a reasonable translation (e.g., '-' -> '_'), do that.
+        // Otherwise, drop it.
+        if (first && Character.isJavaIdentifierPart(c)
+            && !Character.isJavaIdentifierStart(c)) {
+          sb.append("_");
+          sb.append(c);
+          first = false;
+        } else {
+          // Try to map this to a different character or string.
+          // If we can't just give up.
+          String translated = getIdentifierStrForChar(c);
+          if (null != translated) {
+            sb.append(translated);
+            first = false;
+          }
+        }
+      }
+    }
+
+    String output = sb.toString();
+    if (isReservedWord(output)) {
+      // e.g., 'class' -> '_class';
+      return "_" + output;
+    }
+
+    return output;
+  }
 
   /**
    * @param javaType
@@ -593,8 +718,21 @@
       colNames = connManager.getColumnNames(tableName);
     }
 
+    // Translate all the column names into names that are safe to
+    // use as identifiers.
+    String [] cleanedColNames = new String[colNames.length];
+    for (int i = 0; i < colNames.length; i++) {
+      String col = colNames[i];
+      String identifier = toIdentifier(col);
+      cleanedColNames[i] = identifier;
+
+      // make sure the col->type mapping holds for the 
+      // new identifier name, too.
+      columnTypes.put(identifier, columnTypes.get(col));
+    }
+
     // Generate the Java code
-    StringBuilder sb = generateClassForColumns(columnTypes, colNames);
+    StringBuilder sb = generateClassForColumns(columnTypes, cleanedColNames);
 
     // Write this out to a file.
     String codeOutDir = options.getCodeOutputDir();
@@ -605,16 +743,18 @@
     String sourceFilename = className.replace('.', File.separatorChar) + ".java";
     String filename = codeOutDir + sourceFilename;
 
-    LOG.debug("Writing source file: " + filename);
-    LOG.debug("Table name: " + tableName);
-    StringBuilder sbColTypes = new StringBuilder();
-    for (String col : colNames) {
-      Integer colType = columnTypes.get(col);
-      sbColTypes.append(col + ":" + colType + ", ");
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Writing source file: " + filename);
+      LOG.debug("Table name: " + tableName);
+      StringBuilder sbColTypes = new StringBuilder();
+      for (String col : colNames) {
+        Integer colType = columnTypes.get(col);
+        sbColTypes.append(col + ":" + colType + ", ");
+      }
+      String colTypeStr = sbColTypes.toString();
+      LOG.debug("Columns: " + colTypeStr);
+      LOG.debug("sourceFilename is " + sourceFilename);
     }
-    String colTypeStr = sbColTypes.toString();
-    LOG.debug("Columns: " + colTypeStr);
-    LOG.debug("sourceFilename is " + sourceFilename);
 
     compileManager.addSourceFile(sourceFilename);
 

Modified: hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/orm/TableClassName.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/orm/TableClassName.java?rev=890928&r1=890927&r2=890928&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/orm/TableClassName.java (original)
+++ hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/orm/TableClassName.java Tue Dec 15 18:17:35 2009
@@ -90,7 +90,8 @@
     }
 
     // no specific class; no specific package.
-    return tableName;
+    // Just make sure it's a legal identifier.
+    return ClassWriter.toIdentifier(tableName);
   }
 
   /**

Modified: hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/orm/TestClassWriter.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/orm/TestClassWriter.java?rev=890928&r1=890927&r2=890928&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/orm/TestClassWriter.java (original)
+++ hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/orm/TestClassWriter.java Tue Dec 15 18:17:35 2009
@@ -21,6 +21,8 @@
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
+import java.sql.Connection;
+import java.sql.Statement;
 import java.sql.SQLException;
 import java.util.Enumeration;
 import java.util.jar.JarEntry;
@@ -267,5 +269,50 @@
 
     runGenerationTest(argv, OVERRIDE_PACKAGE_NAME + "." + HsqldbTestServer.getTableName());
   }
+
+
+  // Test the SQL identifier -> Java identifier conversion.
+  @Test
+  public void testIdentifierConversion() {
+    assertNull(ClassWriter.getIdentifierStrForChar(' '));
+    assertNull(ClassWriter.getIdentifierStrForChar('\t'));
+    assertNull(ClassWriter.getIdentifierStrForChar('\r'));
+    assertNull(ClassWriter.getIdentifierStrForChar('\n'));
+    assertEquals("x", ClassWriter.getIdentifierStrForChar('x'));
+    assertEquals("_", ClassWriter.getIdentifierStrForChar('-'));
+    assertEquals("_", ClassWriter.getIdentifierStrForChar('_'));
+
+    assertEquals("foo", ClassWriter.toIdentifier("foo"));
+    assertEquals("_class", ClassWriter.toIdentifier("class"));
+    assertEquals("_class", ClassWriter.toIdentifier("cla ss"));
+    assertEquals("_int", ClassWriter.toIdentifier("int"));
+    assertEquals("thisismanywords", ClassWriter.toIdentifier("this is many words"));
+    assertEquals("_9isLegalInSql", ClassWriter.toIdentifier("9isLegalInSql"));
+    assertEquals("___", ClassWriter.toIdentifier("___"));
+  }
+
+  @Test
+  public void testWeirdColumnNames() throws SQLException {
+    // Recreate the table with column names that aren't legal Java identifiers.
+    String tableName = HsqldbTestServer.getTableName();
+    Connection connection = testServer.getConnection();
+    Statement st = connection.createStatement();
+    st.executeUpdate("DROP TABLE " + tableName + " IF EXISTS");
+    st.executeUpdate("CREATE TABLE " + tableName + " (class INT, \"9field\" INT)");
+    st.executeUpdate("INSERT INTO " + tableName + " VALUES(42, 41)");
+    connection.commit();
+    connection.close();
+
+    String [] argv = {
+        "--bindir",
+        JAR_GEN_DIR,
+        "--outdir",
+        CODE_GEN_DIR,
+        "--package-name",
+        OVERRIDE_PACKAGE_NAME
+    };
+
+    runGenerationTest(argv, OVERRIDE_PACKAGE_NAME + "." + HsqldbTestServer.getTableName());
+  }
 }