You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by to...@apache.org on 2009/12/15 19:17:35 UTC
svn commit: r890928 - in /hadoop/mapreduce/trunk: ./
src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/orm/
src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/orm/
Author: tomwhite
Date: Tue Dec 15 18:17:35 2009
New Revision: 890928
URL: http://svn.apache.org/viewvc?rev=890928&view=rev
Log:
MAPREDUCE-1148. SQL identifiers are a superset of Java identifiers. Contributed by Aaron Kimball.
Modified:
hadoop/mapreduce/trunk/CHANGES.txt
hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/orm/ClassWriter.java
hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/orm/TableClassName.java
hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/orm/TestClassWriter.java
Modified: hadoop/mapreduce/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=890928&r1=890927&r2=890928&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/CHANGES.txt (original)
+++ hadoop/mapreduce/trunk/CHANGES.txt Tue Dec 15 18:17:35 2009
@@ -139,6 +139,9 @@
MAPREDUCE-1251. c++ utils doesn't compile. (Eli Collins via tomwhite)
+ MAPREDUCE-1148. SQL identifiers are a superset of Java identifiers.
+ (Aaron Kimball via tomwhite)
+
Release 0.21.0 - Unreleased
INCOMPATIBLE CHANGES
Modified: hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/orm/ClassWriter.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/orm/ClassWriter.java?rev=890928&r1=890927&r2=890928&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/orm/ClassWriter.java (original)
+++ hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/orm/ClassWriter.java Tue Dec 15 18:17:35 2009
@@ -33,6 +33,7 @@
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
+import java.util.HashSet;
import java.util.Map;
import org.apache.commons.logging.Log;
@@ -40,14 +41,57 @@
/**
* Creates an ORM class to represent a table from a database
- *
- *
- *
*/
public class ClassWriter {
public static final Log LOG = LogFactory.getLog(ClassWriter.class.getName());
+ // The following are keywords and cannot be used for class, method, or field
+ // names.
+ public static final HashSet<String> JAVA_RESERVED_WORDS;
+
+ static {
+ JAVA_RESERVED_WORDS = new HashSet<String>();
+
+ JAVA_RESERVED_WORDS.add("abstract");
+ JAVA_RESERVED_WORDS.add("else");
+ JAVA_RESERVED_WORDS.add("int");
+ JAVA_RESERVED_WORDS.add("strictfp");
+ JAVA_RESERVED_WORDS.add("assert");
+ JAVA_RESERVED_WORDS.add("enum");
+ JAVA_RESERVED_WORDS.add("interface");
+ JAVA_RESERVED_WORDS.add("super");
+ JAVA_RESERVED_WORDS.add("boolean");
+ JAVA_RESERVED_WORDS.add("extends");
+ JAVA_RESERVED_WORDS.add("long");
+ JAVA_RESERVED_WORDS.add("switch");
+ JAVA_RESERVED_WORDS.add("break");
+ JAVA_RESERVED_WORDS.add("false");
+ JAVA_RESERVED_WORDS.add("native");
+ JAVA_RESERVED_WORDS.add("synchronized");
+ JAVA_RESERVED_WORDS.add("byte");
+ JAVA_RESERVED_WORDS.add("final");
+ JAVA_RESERVED_WORDS.add("new");
+ JAVA_RESERVED_WORDS.add("this");
+ JAVA_RESERVED_WORDS.add("case");
+ JAVA_RESERVED_WORDS.add("finally");
+ JAVA_RESERVED_WORDS.add("null");
+ JAVA_RESERVED_WORDS.add("throw");
+ JAVA_RESERVED_WORDS.add("catch");
+ JAVA_RESERVED_WORDS.add("float");
+ JAVA_RESERVED_WORDS.add("package");
+ JAVA_RESERVED_WORDS.add("throws");
+ JAVA_RESERVED_WORDS.add("char");
+ JAVA_RESERVED_WORDS.add("for");
+ JAVA_RESERVED_WORDS.add("private");
+ JAVA_RESERVED_WORDS.add("transient");
+ JAVA_RESERVED_WORDS.add("class");
+ JAVA_RESERVED_WORDS.add("goto");
+ JAVA_RESERVED_WORDS.add("protected");
+ JAVA_RESERVED_WORDS.add("true");
+ JAVA_RESERVED_WORDS.add("const");
+ }
+
/**
* This version number is injected into all generated Java classes to denote
* which version of the ClassWriter's output format was used to generate the
@@ -76,6 +120,87 @@
this.compileManager = compMgr;
}
+ /**
+ * Given some character that can't be in an identifier,
+ * try to map it to a string that can.
+ *
+ * @param c a character that can't be in a Java identifier
+ * @return a string of characters that can, or null if there's
+ * no good translation.
+ */
+ static String getIdentifierStrForChar(char c) {
+ if (Character.isJavaIdentifierPart(c)) {
+ return "" + c;
+ } else if (Character.isWhitespace(c)) {
+ // Eliminate whitespace.
+ return null;
+ } else {
+ // All other characters map to underscore.
+ return "_";
+ }
+ }
+
+ /**
+ * @param word a word to test.
+ * @return true if 'word' is reserved the in Java language.
+ */
+ private static boolean isReservedWord(String word) {
+ return JAVA_RESERVED_WORDS.contains(word);
+ }
+
+ /**
+ * Coerce a candidate name for an identifier into one which will
+ * definitely compile.
+ *
+ * Ensures that the returned identifier matches [A-Za-z_][A-Za-z0-9_]*
+ * and is not a reserved word.
+ *
+ * @param candidate A string we want to use as an identifier
+ * @return A string naming an identifier which compiles and is
+ * similar to the candidate.
+ */
+ public static String toIdentifier(String candidate) {
+ StringBuilder sb = new StringBuilder();
+ boolean first = true;
+ for (char c : candidate.toCharArray()) {
+ if (Character.isJavaIdentifierStart(c) && first) {
+ // Ok for this to be the first character of the identifier.
+ sb.append(c);
+ first = false;
+ } else if (Character.isJavaIdentifierPart(c) && !first) {
+ // Ok for this character to be in the output identifier.
+ sb.append(c);
+ } else {
+ // We have a character in the original that can't be
+ // part of this identifier we're building.
+ // If it's just not allowed to be the first char, add a leading '_'.
+ // If we have a reasonable translation (e.g., '-' -> '_'), do that.
+ // Otherwise, drop it.
+ if (first && Character.isJavaIdentifierPart(c)
+ && !Character.isJavaIdentifierStart(c)) {
+ sb.append("_");
+ sb.append(c);
+ first = false;
+ } else {
+ // Try to map this to a different character or string.
+ // If we can't just give up.
+ String translated = getIdentifierStrForChar(c);
+ if (null != translated) {
+ sb.append(translated);
+ first = false;
+ }
+ }
+ }
+ }
+
+ String output = sb.toString();
+ if (isReservedWord(output)) {
+ // e.g., 'class' -> '_class';
+ return "_" + output;
+ }
+
+ return output;
+ }
/**
* @param javaType
@@ -593,8 +718,21 @@
colNames = connManager.getColumnNames(tableName);
}
+ // Translate all the column names into names that are safe to
+ // use as identifiers.
+ String [] cleanedColNames = new String[colNames.length];
+ for (int i = 0; i < colNames.length; i++) {
+ String col = colNames[i];
+ String identifier = toIdentifier(col);
+ cleanedColNames[i] = identifier;
+
+ // make sure the col->type mapping holds for the
+ // new identifier name, too.
+ columnTypes.put(identifier, columnTypes.get(col));
+ }
+
// Generate the Java code
- StringBuilder sb = generateClassForColumns(columnTypes, colNames);
+ StringBuilder sb = generateClassForColumns(columnTypes, cleanedColNames);
// Write this out to a file.
String codeOutDir = options.getCodeOutputDir();
@@ -605,16 +743,18 @@
String sourceFilename = className.replace('.', File.separatorChar) + ".java";
String filename = codeOutDir + sourceFilename;
- LOG.debug("Writing source file: " + filename);
- LOG.debug("Table name: " + tableName);
- StringBuilder sbColTypes = new StringBuilder();
- for (String col : colNames) {
- Integer colType = columnTypes.get(col);
- sbColTypes.append(col + ":" + colType + ", ");
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Writing source file: " + filename);
+ LOG.debug("Table name: " + tableName);
+ StringBuilder sbColTypes = new StringBuilder();
+ for (String col : colNames) {
+ Integer colType = columnTypes.get(col);
+ sbColTypes.append(col + ":" + colType + ", ");
+ }
+ String colTypeStr = sbColTypes.toString();
+ LOG.debug("Columns: " + colTypeStr);
+ LOG.debug("sourceFilename is " + sourceFilename);
}
- String colTypeStr = sbColTypes.toString();
- LOG.debug("Columns: " + colTypeStr);
- LOG.debug("sourceFilename is " + sourceFilename);
compileManager.addSourceFile(sourceFilename);
Modified: hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/orm/TableClassName.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/orm/TableClassName.java?rev=890928&r1=890927&r2=890928&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/orm/TableClassName.java (original)
+++ hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/orm/TableClassName.java Tue Dec 15 18:17:35 2009
@@ -90,7 +90,8 @@
}
// no specific class; no specific package.
- return tableName;
+ // Just make sure it's a legal identifier.
+ return ClassWriter.toIdentifier(tableName);
}
/**
Modified: hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/orm/TestClassWriter.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/orm/TestClassWriter.java?rev=890928&r1=890927&r2=890928&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/orm/TestClassWriter.java (original)
+++ hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/orm/TestClassWriter.java Tue Dec 15 18:17:35 2009
@@ -21,6 +21,8 @@
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
+import java.sql.Connection;
+import java.sql.Statement;
import java.sql.SQLException;
import java.util.Enumeration;
import java.util.jar.JarEntry;
@@ -267,5 +269,50 @@
runGenerationTest(argv, OVERRIDE_PACKAGE_NAME + "." + HsqldbTestServer.getTableName());
}
+
+
+ // Test the SQL identifier -> Java identifier conversion.
+ @Test
+ public void testIdentifierConversion() {
+ assertNull(ClassWriter.getIdentifierStrForChar(' '));
+ assertNull(ClassWriter.getIdentifierStrForChar('\t'));
+ assertNull(ClassWriter.getIdentifierStrForChar('\r'));
+ assertNull(ClassWriter.getIdentifierStrForChar('\n'));
+ assertEquals("x", ClassWriter.getIdentifierStrForChar('x'));
+ assertEquals("_", ClassWriter.getIdentifierStrForChar('-'));
+ assertEquals("_", ClassWriter.getIdentifierStrForChar('_'));
+
+ assertEquals("foo", ClassWriter.toIdentifier("foo"));
+ assertEquals("_class", ClassWriter.toIdentifier("class"));
+ assertEquals("_class", ClassWriter.toIdentifier("cla ss"));
+ assertEquals("_int", ClassWriter.toIdentifier("int"));
+ assertEquals("thisismanywords", ClassWriter.toIdentifier("this is many words"));
+ assertEquals("_9isLegalInSql", ClassWriter.toIdentifier("9isLegalInSql"));
+ assertEquals("___", ClassWriter.toIdentifier("___"));
+ }
+
+ @Test
+ public void testWeirdColumnNames() throws SQLException {
+ // Recreate the table with column names that aren't legal Java identifiers.
+ String tableName = HsqldbTestServer.getTableName();
+ Connection connection = testServer.getConnection();
+ Statement st = connection.createStatement();
+ st.executeUpdate("DROP TABLE " + tableName + " IF EXISTS");
+ st.executeUpdate("CREATE TABLE " + tableName + " (class INT, \"9field\" INT)");
+ st.executeUpdate("INSERT INTO " + tableName + " VALUES(42, 41)");
+ connection.commit();
+ connection.close();
+
+ String [] argv = {
+ "--bindir",
+ JAR_GEN_DIR,
+ "--outdir",
+ CODE_GEN_DIR,
+ "--package-name",
+ OVERRIDE_PACKAGE_NAME
+ };
+
+ runGenerationTest(argv, OVERRIDE_PACKAGE_NAME + "." + HsqldbTestServer.getTableName());
+ }
}