You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sqoop.apache.org by ar...@apache.org on 2011/11/23 00:30:11 UTC
svn commit: r1205233 - in /incubator/sqoop/trunk/src:
java/com/cloudera/sqoop/ java/org/apache/sqoop/ java/org/apache/sqoop/orm/
java/org/apache/sqoop/tool/ test/com/cloudera/sqoop/
Author: arvind
Date: Tue Nov 22 23:30:10 2011
New Revision: 1205233
URL: http://svn.apache.org/viewvc?rev=1205233&view=rev
Log:
SQOOP-362. Allow user to override type mapping when doing Avro import.
(Jarcec Cecho via Arvind Prabhakar)
Modified:
incubator/sqoop/trunk/src/java/com/cloudera/sqoop/SqoopOptions.java
incubator/sqoop/trunk/src/java/org/apache/sqoop/SqoopOptions.java
incubator/sqoop/trunk/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java
incubator/sqoop/trunk/src/java/org/apache/sqoop/tool/BaseSqoopTool.java
incubator/sqoop/trunk/src/java/org/apache/sqoop/tool/ImportTool.java
incubator/sqoop/trunk/src/test/com/cloudera/sqoop/TestAvroImport.java
Modified: incubator/sqoop/trunk/src/java/com/cloudera/sqoop/SqoopOptions.java
URL: http://svn.apache.org/viewvc/incubator/sqoop/trunk/src/java/com/cloudera/sqoop/SqoopOptions.java?rev=1205233&r1=1205232&r2=1205233&view=diff
==============================================================================
--- incubator/sqoop/trunk/src/java/com/cloudera/sqoop/SqoopOptions.java (original)
+++ incubator/sqoop/trunk/src/java/com/cloudera/sqoop/SqoopOptions.java Tue Nov 22 23:30:10 2011
@@ -21,7 +21,7 @@ package com.cloudera.sqoop;
import org.apache.hadoop.conf.Configuration;
/**
- * Configurable state used by Sqoop tools.
+ * @deprecated
*/
public class SqoopOptions
extends org.apache.sqoop.SqoopOptions implements Cloneable {
@@ -90,6 +90,7 @@ public class SqoopOptions
/**
* {@inheritDoc}.
+ * @deprecated
*/
public static class InvalidOptionsException
extends org.apache.sqoop.SqoopOptions.InvalidOptionsException {
Modified: incubator/sqoop/trunk/src/java/org/apache/sqoop/SqoopOptions.java
URL: http://svn.apache.org/viewvc/incubator/sqoop/trunk/src/java/org/apache/sqoop/SqoopOptions.java?rev=1205233&r1=1205232&r2=1205233&view=diff
==============================================================================
--- incubator/sqoop/trunk/src/java/org/apache/sqoop/SqoopOptions.java (original)
+++ incubator/sqoop/trunk/src/java/org/apache/sqoop/SqoopOptions.java Tue Nov 22 23:30:10 2011
@@ -1011,7 +1011,7 @@ public class SqoopOptions implements Clo
parseColumnMapping(mapColumn, mapColumnHive);
}
- public void setMapColumn(String mapColumn) {
+ public void setMapColumnJava(String mapColumn) {
parseColumnMapping(mapColumn, mapColumnJava);
}
Modified: incubator/sqoop/trunk/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java
URL: http://svn.apache.org/viewvc/incubator/sqoop/trunk/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java?rev=1205233&r1=1205232&r2=1205233&view=diff
==============================================================================
--- incubator/sqoop/trunk/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java (original)
+++ incubator/sqoop/trunk/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java Tue Nov 22 23:30:10 2011
@@ -30,6 +30,7 @@ import org.apache.avro.Schema.Type;
import com.cloudera.sqoop.SqoopOptions;
import com.cloudera.sqoop.manager.ConnManager;
+import java.util.Properties;
/**
* Creates an Avro schema to represent a table from a database.
@@ -57,7 +58,7 @@ public class AvroSchemaGenerator {
for (String columnName : columnNames) {
String cleanedCol = ClassWriter.toIdentifier(columnName);
int sqlType = columnTypes.get(cleanedCol);
- Schema avroSchema = toAvroSchema(sqlType);
+ Schema avroSchema = toAvroSchema(sqlType, columnName);
Field field = new Field(cleanedCol, avroSchema, null, null);
field.addProp("columnName", columnName);
field.addProp("sqlType", Integer.toString(sqlType));
@@ -112,13 +113,44 @@ public class AvroSchemaGenerator {
}
}
- public Schema toAvroSchema(int sqlType) {
- // All types are assumed nullabl;e make a union of the "true" type for
- // a column and NULL.
+ private Type toAvroType(String type) {
+ if(type.equalsIgnoreCase("INTEGER")) { return Type.INT; }
+ if(type.equalsIgnoreCase("LONG")) { return Type.LONG; }
+ if(type.equalsIgnoreCase("BOOLEAN")) { return Type.BOOLEAN; }
+ if(type.equalsIgnoreCase("FLOAT")) { return Type.FLOAT; }
+ if(type.equalsIgnoreCase("DOUBLE")) { return Type.DOUBLE; }
+ if(type.equalsIgnoreCase("STRING")) { return Type.STRING; }
+ if(type.equalsIgnoreCase("BYTES")) { return Type.BYTES; }
+
+ // Mapping was not found
+ throw new IllegalArgumentException("Cannot convert to AVRO type " + type);
+ }
+
+ /**
+ * Will create union, because each type is assumed to be nullable.
+ *
+ * @param sqlType Original SQL type (might be overridden by user)
+ * @param columnName Column name from the query
+ * @return Schema
+ */
+ public Schema toAvroSchema(int sqlType, String columnName) {
+ Properties mappingJava = options.getMapColumnJava();
+
+ // Try to apply any user specified mapping
+ Type targetType;
+ if(columnName != null && mappingJava.containsKey(columnName)) {
+ targetType = toAvroType((String)mappingJava.get(columnName));
+ } else {
+ targetType = toAvroType(sqlType);
+ }
+
List<Schema> childSchemas = new ArrayList<Schema>();
- childSchemas.add(Schema.create(toAvroType(sqlType)));
+ childSchemas.add(Schema.create(targetType));
childSchemas.add(Schema.create(Schema.Type.NULL));
return Schema.createUnion(childSchemas);
}
+ public Schema toAvroSchema(int sqlType) {
+ return toAvroSchema(sqlType, null);
+ }
}
Modified: incubator/sqoop/trunk/src/java/org/apache/sqoop/tool/BaseSqoopTool.java
URL: http://svn.apache.org/viewvc/incubator/sqoop/trunk/src/java/org/apache/sqoop/tool/BaseSqoopTool.java?rev=1205233&r1=1205232&r2=1205233&view=diff
==============================================================================
--- incubator/sqoop/trunk/src/java/org/apache/sqoop/tool/BaseSqoopTool.java (original)
+++ incubator/sqoop/trunk/src/java/org/apache/sqoop/tool/BaseSqoopTool.java Tue Nov 22 23:30:10 2011
@@ -861,7 +861,7 @@ public abstract class BaseSqoopTool exte
}
if (in.hasOption(MAP_COLUMN_JAVA)) {
- out.setMapColumn(in.getOptionValue(MAP_COLUMN_JAVA));
+ out.setMapColumnJava(in.getOptionValue(MAP_COLUMN_JAVA));
}
if (!multiTable && in.hasOption(CLASS_NAME_ARG)) {
Modified: incubator/sqoop/trunk/src/java/org/apache/sqoop/tool/ImportTool.java
URL: http://svn.apache.org/viewvc/incubator/sqoop/trunk/src/java/org/apache/sqoop/tool/ImportTool.java?rev=1205233&r1=1205232&r2=1205233&view=diff
==============================================================================
--- incubator/sqoop/trunk/src/java/org/apache/sqoop/tool/ImportTool.java (original)
+++ incubator/sqoop/trunk/src/java/org/apache/sqoop/tool/ImportTool.java Tue Nov 22 23:30:10 2011
@@ -38,7 +38,6 @@ import org.apache.hadoop.util.StringUtil
import com.cloudera.sqoop.Sqoop;
import com.cloudera.sqoop.SqoopOptions;
-import com.cloudera.sqoop.SqoopOptions.FileLayout;
import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException;
import com.cloudera.sqoop.cli.RelatedOptions;
import com.cloudera.sqoop.cli.ToolOptions;
@@ -838,10 +837,6 @@ public class ImportTool extends com.clou
"MySQL direct export currently supports only text output format."
+ "Parameters --as-sequencefile and --as-avrodatafile are not "
+ "supported with --direct params in MySQL case.");
- } else if (!options.getMapColumnJava().isEmpty()
- && options.getFileLayout() == FileLayout.AvroDataFile) {
- throw new InvalidOptionsException(
- "Overriding column types is currently not supported with avro.");
}
}
Modified: incubator/sqoop/trunk/src/test/com/cloudera/sqoop/TestAvroImport.java
URL: http://svn.apache.org/viewvc/incubator/sqoop/trunk/src/test/com/cloudera/sqoop/TestAvroImport.java?rev=1205233&r1=1205232&r2=1205233&view=diff
==============================================================================
--- incubator/sqoop/trunk/src/test/com/cloudera/sqoop/TestAvroImport.java (original)
+++ incubator/sqoop/trunk/src/test/com/cloudera/sqoop/TestAvroImport.java Tue Nov 22 23:30:10 2011
@@ -22,6 +22,7 @@ import java.io.IOException;
import java.nio.ByteBuffer;
import java.sql.SQLException;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.List;
import org.apache.avro.Schema;
@@ -56,7 +57,8 @@ public class TestAvroImport extends Impo
*
* @return the argv as an array of strings.
*/
- protected String[] getOutputArgv(boolean includeHadoopFlags) {
+ protected String[] getOutputArgv(boolean includeHadoopFlags,
+ String[] extraArgs) {
ArrayList<String> args = new ArrayList<String>();
if (includeHadoopFlags) {
@@ -72,6 +74,9 @@ public class TestAvroImport extends Impo
args.add("--split-by");
args.add("INTFIELD1");
args.add("--as-avrodatafile");
+ if(extraArgs != null) {
+ args.addAll(Arrays.asList(extraArgs));
+ }
return args.toArray(new String[0]);
}
@@ -84,7 +89,7 @@ public class TestAvroImport extends Impo
"'s'", "'0102'", };
createTableWithColTypes(types, vals);
- runImport(getOutputArgv(true));
+ runImport(getOutputArgv(true, null));
Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
DataFileReader<GenericRecord> reader = read(outputFile);
@@ -115,6 +120,28 @@ public class TestAvroImport extends Impo
assertEquals((byte) 2, b.get(1));
}
+ public void testOverrideTypeMapping() throws IOException {
+ String [] types = { "INT" };
+ String [] vals = { "10" };
+ createTableWithColTypes(types, vals);
+
+ String [] extraArgs = { "--map-column-java", "DATA_COL0=String"};
+
+ runImport(getOutputArgv(true, extraArgs));
+
+ Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
+ DataFileReader<GenericRecord> reader = read(outputFile);
+ Schema schema = reader.getSchema();
+ assertEquals(Schema.Type.RECORD, schema.getType());
+ List<Field> fields = schema.getFields();
+ assertEquals(types.length, fields.size());
+
+ checkField(fields.get(0), "DATA_COL0", Schema.Type.STRING);
+
+ GenericRecord record1 = reader.next();
+ assertEquals("DATA_COL0", new Utf8("10"), record1.get("DATA_COL0"));
+ }
+
private void checkField(Field field, String name, Type type) {
assertEquals(name, field.name());
assertEquals(Schema.Type.UNION, field.schema().getType());
@@ -127,7 +154,7 @@ public class TestAvroImport extends Impo
String [] vals = { null };
createTableWithColTypes(types, vals);
- runImport(getOutputArgv(true));
+ runImport(getOutputArgv(true, null));
Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
DataFileReader<GenericRecord> reader = read(outputFile);