You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sqoop.apache.org by ja...@apache.org on 2015/09/15 16:59:11 UTC
sqoop git commit: SQOOP-1369: Avro export ignores --columns option
Repository: sqoop
Updated Branches:
refs/heads/trunk fee80ac01 -> 5fc7a680f
SQOOP-1369: Avro export ignores --columns option
(Paul Mazak via Jarek Jarcec Cecho)
Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/5fc7a680
Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/5fc7a680
Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/5fc7a680
Branch: refs/heads/trunk
Commit: 5fc7a680f854c4a03e9a581079172d453d1a059c
Parents: fee80ac
Author: Jarek Jarcec Cecho <ja...@apache.org>
Authored: Tue Sep 15 07:58:53 2015 -0700
Committer: Jarek Jarcec Cecho <ja...@apache.org>
Committed: Tue Sep 15 07:58:53 2015 -0700
----------------------------------------------------------------------
src/java/org/apache/sqoop/SqoopOptions.java | 11 ++
.../sqoop/manager/CatalogQueryManager.java | 2 +-
.../org/apache/sqoop/manager/OracleManager.java | 2 +-
.../org/apache/sqoop/manager/SqlManager.java | 21 ++-
.../apache/sqoop/mapreduce/JdbcExportJob.java | 12 +-
src/test/com/cloudera/sqoop/TestAvroExport.java | 22 +++
src/test/org/apache/sqoop/TestSqoopOptions.java | 10 ++
.../apache/sqoop/manager/TestSqlManager.java | 78 +++++++++++
.../sqoop/mapreduce/TestJdbcExportJob.java | 137 +++++++++++++++++++
9 files changed, 288 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/sqoop/blob/5fc7a680/src/java/org/apache/sqoop/SqoopOptions.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/SqoopOptions.java b/src/java/org/apache/sqoop/SqoopOptions.java
index ace90fd..db92b30 100644
--- a/src/java/org/apache/sqoop/SqoopOptions.java
+++ b/src/java/org/apache/sqoop/SqoopOptions.java
@@ -1175,6 +1175,17 @@ public class SqoopOptions implements Cloneable {
}
}
+ public String getColumnNameCaseInsensitive(String col){
+ if (null != columns) {
+ for(String columnName : columns) {
+ if(columnName.equalsIgnoreCase(col)) {
+ return columnName;
+ }
+ }
+ }
+ return null;
+ }
+
public void setColumns(String [] cols) {
if (null == cols) {
this.columns = null;
http://git-wip-us.apache.org/repos/asf/sqoop/blob/5fc7a680/src/java/org/apache/sqoop/manager/CatalogQueryManager.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/manager/CatalogQueryManager.java b/src/java/org/apache/sqoop/manager/CatalogQueryManager.java
index 4e063ed..7b2ee78 100644
--- a/src/java/org/apache/sqoop/manager/CatalogQueryManager.java
+++ b/src/java/org/apache/sqoop/manager/CatalogQueryManager.java
@@ -180,7 +180,7 @@ public abstract class CatalogQueryManager
}
}
- return columns.toArray(new String[columns.size()]);
+ return filterSpecifiedColumnNames(columns.toArray(new String[columns.size()]));
}
protected abstract String getPrimaryKeyQuery(String tableName);
http://git-wip-us.apache.org/repos/asf/sqoop/blob/5fc7a680/src/java/org/apache/sqoop/manager/OracleManager.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/manager/OracleManager.java b/src/java/org/apache/sqoop/manager/OracleManager.java
index 69b613f..d088265 100644
--- a/src/java/org/apache/sqoop/manager/OracleManager.java
+++ b/src/java/org/apache/sqoop/manager/OracleManager.java
@@ -980,7 +980,7 @@ public class OracleManager
}
}
- return columns.toArray(new String[columns.size()]);
+ return filterSpecifiedColumnNames(columns.toArray(new String[columns.size()]));
}
@Override
http://git-wip-us.apache.org/repos/asf/sqoop/blob/5fc7a680/src/java/org/apache/sqoop/manager/SqlManager.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/manager/SqlManager.java b/src/java/org/apache/sqoop/manager/SqlManager.java
index ead581d..768507b 100644
--- a/src/java/org/apache/sqoop/manager/SqlManager.java
+++ b/src/java/org/apache/sqoop/manager/SqlManager.java
@@ -113,7 +113,26 @@ public abstract class SqlManager
/** {@inheritDoc} */
public String[] getColumnNames(String tableName) {
String stmt = getColNamesQuery(tableName);
- return getColumnNamesForRawQuery(stmt);
+ return filterSpecifiedColumnNames(getColumnNamesForRawQuery(stmt));
+ }
+
+ /**
+ * Utilize the --columns option, if specified.
+ * @param columns
+ * @return the subset of columns which were specified by --columns option.
+ */
+ protected String[] filterSpecifiedColumnNames(String[] columns) {
+ if (options.getColumns() == null) {
+ return columns;
+ }
+ List<String> colNames = new ArrayList<String>();
+ for (String col : columns) {
+ String userColName = options.getColumnNameCaseInsensitive(col);
+ if (userColName != null) {
+ colNames.add(userColName);
+ }
+ }
+ return colNames.toArray(new String[colNames.size()]);
}
@Override
http://git-wip-us.apache.org/repos/asf/sqoop/blob/5fc7a680/src/java/org/apache/sqoop/mapreduce/JdbcExportJob.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/mapreduce/JdbcExportJob.java b/src/java/org/apache/sqoop/mapreduce/JdbcExportJob.java
index 93d438a..78df33c 100644
--- a/src/java/org/apache/sqoop/mapreduce/JdbcExportJob.java
+++ b/src/java/org/apache/sqoop/mapreduce/JdbcExportJob.java
@@ -97,12 +97,16 @@ public class JdbcExportJob extends ExportJobBase {
columnTypeInts = connManager.getColumnTypesForProcedure(
options.getCall());
}
+ String[] specifiedColumns = options.getColumns();
MapWritable columnTypes = new MapWritable();
for (Map.Entry<String, Integer> e : columnTypeInts.entrySet()) {
- Text columnName = new Text(e.getKey());
- Text columnText = new Text(
- connManager.toJavaType(tableName, e.getKey(), e.getValue()));
- columnTypes.put(columnName, columnText);
+ String column = e.getKey();
+ column = (specifiedColumns == null) ? column : options.getColumnNameCaseInsensitive(column);
+ if (column != null) {
+ Text columnName = new Text(column);
+ Text columnType = new Text(connManager.toJavaType(tableName, column, e.getValue()));
+ columnTypes.put(columnName, columnType);
+ }
}
DefaultStringifier.store(job.getConfiguration(), columnTypes,
AvroExportMapper.AVRO_COLUMN_TYPES_MAP);
http://git-wip-us.apache.org/repos/asf/sqoop/blob/5fc7a680/src/test/com/cloudera/sqoop/TestAvroExport.java
----------------------------------------------------------------------
diff --git a/src/test/com/cloudera/sqoop/TestAvroExport.java b/src/test/com/cloudera/sqoop/TestAvroExport.java
index 663828c..1d8f5df 100644
--- a/src/test/com/cloudera/sqoop/TestAvroExport.java
+++ b/src/test/com/cloudera/sqoop/TestAvroExport.java
@@ -404,4 +404,26 @@ public class TestAvroExport extends ExportJobTestCase {
}
}
+ public void testSpecifiedColumnsAsAvroFields() throws IOException, SQLException {
+ final int TOTAL_RECORDS = 10;
+ ColumnGenerator[] gens = new ColumnGenerator[] {
+ colGenerator(000, Schema.create(Schema.Type.INT), 100, "INTEGER"), //col0
+ colGenerator(111, Schema.create(Schema.Type.INT), 100, "INTEGER"), //col1
+ colGenerator(222, Schema.create(Schema.Type.INT), 100, "INTEGER"), //col2
+ colGenerator(333, Schema.create(Schema.Type.INT), 100, "INTEGER") //col3
+ };
+ createAvroFile(0, TOTAL_RECORDS, gens);
+ createTable(gens);
+ runExport(getArgv(true, 10, 10, newStrArray(null, "-m", "" + 1, "--columns", "id,msg,col1,col2")));
+ verifyExport(TOTAL_RECORDS);
+ assertColValForRowId(0, "col0", null);
+ assertColValForRowId(0, "col1", 111);
+ assertColValForRowId(0, "col2", 222);
+ assertColValForRowId(0, "col3", null);
+ assertColValForRowId(9, "col0", null);
+ assertColValForRowId(9, "col1", 111);
+ assertColValForRowId(9, "col2", 222);
+ assertColValForRowId(9, "col3", null);
+ }
+
}
http://git-wip-us.apache.org/repos/asf/sqoop/blob/5fc7a680/src/test/org/apache/sqoop/TestSqoopOptions.java
----------------------------------------------------------------------
diff --git a/src/test/org/apache/sqoop/TestSqoopOptions.java b/src/test/org/apache/sqoop/TestSqoopOptions.java
index 87b340a..fdb8c8d 100644
--- a/src/test/org/apache/sqoop/TestSqoopOptions.java
+++ b/src/test/org/apache/sqoop/TestSqoopOptions.java
@@ -34,4 +34,14 @@ public class TestSqoopOptions extends TestCase {
}
}.testParseColumnMapping();
}
+
+ public void testColumnNameCaseInsensitive() {
+ SqoopOptions opts = new SqoopOptions();
+ opts.setColumns(new String[]{ "AAA", "bbb" });
+ assertEquals("AAA", opts.getColumnNameCaseInsensitive("aAa"));
+ assertEquals("bbb", opts.getColumnNameCaseInsensitive("BbB"));
+ assertEquals(null, opts.getColumnNameCaseInsensitive("notFound"));
+ opts.setColumns(null);
+ assertEquals(null, opts.getColumnNameCaseInsensitive("noColumns"));
+ }
}
http://git-wip-us.apache.org/repos/asf/sqoop/blob/5fc7a680/src/test/org/apache/sqoop/manager/TestSqlManager.java
----------------------------------------------------------------------
diff --git a/src/test/org/apache/sqoop/manager/TestSqlManager.java b/src/test/org/apache/sqoop/manager/TestSqlManager.java
new file mode 100644
index 0000000..08413b0
--- /dev/null
+++ b/src/test/org/apache/sqoop/manager/TestSqlManager.java
@@ -0,0 +1,78 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.sqoop.manager;
+
+import static org.junit.Assert.assertArrayEquals;
+
+import java.sql.Connection;
+import java.sql.SQLException;
+
+import org.junit.Test;
+
+import com.cloudera.sqoop.SqoopOptions;
+
+import junit.framework.TestCase;
+
+/**
+ * Test methods of the generic SqlManager implementation.
+ */
+public class TestSqlManager extends TestCase {
+
+ @Test
+ public void testFilteringSpecifiedColumnNamesWhenNoneSpecified() {
+ SqoopOptions opts = new SqoopOptions();
+ SqlManager sqlManager = stubSqlManager(opts);
+ String[] allColumnsFromDbTable = { "aaa", "bbb", "ccc", "ddd" };
+ assertArrayEquals(new String[]{ "aaa", "bbb", "ccc", "ddd" }, sqlManager.filterSpecifiedColumnNames(allColumnsFromDbTable));
+ }
+
+ @Test
+ public void testFilteringSpecifiedColumnNamesWhenSubset() {
+ SqoopOptions opts = new SqoopOptions();
+ String[] cols = { "bbb", "ccc" };
+ opts.setColumns(cols);
+ SqlManager sqlManager = stubSqlManager(opts);
+ String[] allColumnsFromDbTable = { "aaa", "bbb", "ccc", "ddd" };
+ assertArrayEquals(new String[]{ "bbb", "ccc" }, sqlManager.filterSpecifiedColumnNames(allColumnsFromDbTable));
+ }
+
+ @Test
+ public void testFilteringSpecifiedColumnNamesUsesCaseFromArgumentNotDatabase() {
+ SqoopOptions opts = new SqoopOptions();
+ String[] cols = { "bbb", "ccc" };
+ opts.setColumns(cols);
+ SqlManager sqlManager = stubSqlManager(opts);
+ String[] allColumnsFromDbTable = { "AAA", "BBB", "CCC", "DDD" };
+ assertArrayEquals(new String[]{ "bbb", "ccc" }, sqlManager.filterSpecifiedColumnNames(allColumnsFromDbTable));
+ }
+
+ private SqlManager stubSqlManager(SqoopOptions opts) {
+ SqlManager sqlManager = new SqlManager(opts) {
+ @Override
+ public Connection getConnection() throws SQLException {
+ return null;
+ }
+ @Override
+ public String getDriverClass() {
+ return null;
+ }
+ };
+ return sqlManager;
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/sqoop/blob/5fc7a680/src/test/org/apache/sqoop/mapreduce/TestJdbcExportJob.java
----------------------------------------------------------------------
diff --git a/src/test/org/apache/sqoop/mapreduce/TestJdbcExportJob.java b/src/test/org/apache/sqoop/mapreduce/TestJdbcExportJob.java
new file mode 100644
index 0000000..19440ff
--- /dev/null
+++ b/src/test/org/apache/sqoop/mapreduce/TestJdbcExportJob.java
@@ -0,0 +1,137 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.sqoop.mapreduce;
+
+import java.io.IOException;
+import java.sql.Types;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+import static org.mockito.Matchers.anyInt;
+import static org.mockito.Matchers.anyString;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import org.apache.hadoop.io.DefaultStringifier;
+import org.apache.hadoop.io.MapWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.sqoop.mapreduce.ExportJobBase.FileType;
+import org.junit.Test;
+
+import com.cloudera.sqoop.SqoopOptions;
+import com.cloudera.sqoop.manager.ConnManager;
+import com.cloudera.sqoop.manager.ExportJobContext;
+
+import junit.framework.TestCase;
+
+/**
+ * Test methods of the JdbcExportJob implementation.
+ */
+public class TestJdbcExportJob extends TestCase {
+
+ @Test
+ public void testAvroWithNoColumnsSpecified() throws Exception {
+ SqoopOptions opts = new SqoopOptions();
+ opts.setExportDir("myexportdir");
+ JdbcExportJob jdbcExportJob = stubJdbcExportJob(opts, FileType.AVRO_DATA_FILE);
+ Job job = new Job();
+ jdbcExportJob.configureInputFormat(job, null, null, null);
+ assertEquals(asSetOfText("Age", "Name", "Gender"), DefaultStringifier.load(job.getConfiguration(), AvroExportMapper.AVRO_COLUMN_TYPES_MAP, MapWritable.class).keySet());
+ }
+
+ @Test
+ public void testAvroWithAllColumnsSpecified() throws Exception {
+ SqoopOptions opts = new SqoopOptions();
+ opts.setExportDir("myexportdir");
+ String[] columns = { "Age", "Name", "Gender" };
+ opts.setColumns(columns);
+ JdbcExportJob jdbcExportJob = stubJdbcExportJob(opts, FileType.AVRO_DATA_FILE);
+ Job job = new Job();
+ jdbcExportJob.configureInputFormat(job, null, null, null);
+ assertEquals(asSetOfText("Age", "Name", "Gender"), DefaultStringifier.load(job.getConfiguration(), AvroExportMapper.AVRO_COLUMN_TYPES_MAP, MapWritable.class).keySet());
+ }
+
+ @Test
+ public void testAvroWithOneColumnSpecified() throws Exception {
+ SqoopOptions opts = new SqoopOptions();
+ opts.setExportDir("myexportdir");
+ String[] columns = { "Gender" };
+ opts.setColumns(columns);
+ JdbcExportJob jdbcExportJob = stubJdbcExportJob(opts, FileType.AVRO_DATA_FILE);
+ Job job = new Job();
+ jdbcExportJob.configureInputFormat(job, null, null, null);
+ assertEquals(asSetOfText("Gender"), DefaultStringifier.load(job.getConfiguration(), AvroExportMapper.AVRO_COLUMN_TYPES_MAP, MapWritable.class).keySet());
+ }
+
+ @Test
+ public void testAvroWithSomeColumnsSpecified() throws Exception {
+ SqoopOptions opts = new SqoopOptions();
+ opts.setExportDir("myexportdir");
+ String[] columns = { "Age", "Name" };
+ opts.setColumns(columns);
+ JdbcExportJob jdbcExportJob = stubJdbcExportJob(opts, FileType.AVRO_DATA_FILE);
+ Job job = new Job();
+ jdbcExportJob.configureInputFormat(job, null, null, null);
+ assertEquals(asSetOfText("Age", "Name"), DefaultStringifier.load(job.getConfiguration(), AvroExportMapper.AVRO_COLUMN_TYPES_MAP, MapWritable.class).keySet());
+ }
+
+ @Test
+ public void testAvroWithMoreColumnsSpecified() throws Exception {
+ SqoopOptions opts = new SqoopOptions();
+ opts.setExportDir("myexportdir");
+ String[] columns = { "Age", "Name", "Gender", "Address" };
+ opts.setColumns(columns);
+ JdbcExportJob jdbcExportJob = stubJdbcExportJob(opts, FileType.AVRO_DATA_FILE);
+ Job job = new Job();
+ jdbcExportJob.configureInputFormat(job, null, null, null);
+ assertEquals(asSetOfText("Age", "Name", "Gender"), DefaultStringifier.load(job.getConfiguration(), AvroExportMapper.AVRO_COLUMN_TYPES_MAP, MapWritable.class).keySet());
+ }
+
+ private JdbcExportJob stubJdbcExportJob(SqoopOptions opts, final FileType inputFileType) throws IOException {
+ ExportJobContext mockContext = mock(ExportJobContext.class);
+ when(mockContext.getOptions()).thenReturn(opts);
+ ConnManager mockConnManager = mock(ConnManager.class);
+ Map<String, Integer> columnTypeInts = new HashMap<String, Integer>();
+ columnTypeInts.put("Name", Types.VARCHAR);
+ columnTypeInts.put("Age", Types.SMALLINT);
+ columnTypeInts.put("Gender", Types.CHAR);
+ when(mockConnManager.getColumnTypes(anyString(), anyString())).thenReturn(columnTypeInts);
+ when(mockConnManager.toJavaType(anyString(), anyString(), anyInt())).thenReturn("String");
+ when(mockContext.getConnManager()).thenReturn(mockConnManager);
+ JdbcExportJob jdbcExportJob = new JdbcExportJob(mockContext) {
+ @Override
+ protected FileType getInputFileType() {
+ return inputFileType;
+ }
+ };
+ jdbcExportJob.options = opts;
+ return jdbcExportJob;
+ }
+
+ private Set<Text> asSetOfText(String... strings) {
+ Set<Text> setOfText = new HashSet<Text>();
+ for (String string : strings) {
+ setOfText.add(new Text(string));
+ }
+ return setOfText;
+ }
+}
\ No newline at end of file