You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by to...@apache.org on 2009/08/12 16:19:06 UTC

svn commit: r803529 - in /hadoop/mapreduce/trunk: ./ src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/ src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/ src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/ src/contrib/sqoop/src/test/org/apac...

Author: tomwhite
Date: Wed Aug 12 14:19:06 2009
New Revision: 803529

URL: http://svn.apache.org/viewvc?rev=803529&view=rev
Log:
MAPREDUCE-789. Oracle support for Sqoop. Contributed by Aaron Kimball.

Added:
    hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/OracleManager.java
    hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/manager/OracleManagerTest.java
Modified:
    hadoop/mapreduce/trunk/CHANGES.txt
    hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/ConnFactory.java
    hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/LocalMySQLManager.java
    hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/AllTests.java

Modified: hadoop/mapreduce/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=803529&r1=803528&r2=803529&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/CHANGES.txt (original)
+++ hadoop/mapreduce/trunk/CHANGES.txt Wed Aug 12 14:19:06 2009
@@ -187,6 +187,8 @@
     MAPREDUCE-372. Moves ChainMapper/Reducer to the new API.
     (Amareshwari Sriramadasu via ddas)
 
+    MAPREDUCE-789. Oracle support for Sqoop. (Aaron Kimball via tomwhite)
+
   BUG FIXES
     MAPREDUCE-703. Sqoop requires dependency on hsqldb in ivy.
     (Aaron Kimball via matei)

Modified: hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/ConnFactory.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/ConnFactory.java?rev=803529&r1=803528&r2=803529&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/ConnFactory.java (original)
+++ hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/ConnFactory.java Wed Aug 12 14:19:06 2009
@@ -23,6 +23,7 @@
 import org.apache.hadoop.sqoop.manager.HsqldbManager;
 import org.apache.hadoop.sqoop.manager.LocalMySQLManager;
 import org.apache.hadoop.sqoop.manager.MySQLManager;
+import org.apache.hadoop.sqoop.manager.OracleManager;
 
 import java.io.IOException;
 
@@ -78,6 +79,8 @@
       }
     } else if (scheme.equals("jdbc:hsqldb:hsql:")) {
       return new HsqldbManager(opts);
+    } else if (scheme.startsWith("jdbc:oracle:")) {
+      return new OracleManager(opts);
     } else {
       throw new IOException("Unknown connection scheme: " + scheme);
     }

Modified: hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/LocalMySQLManager.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/LocalMySQLManager.java?rev=803529&r1=803528&r2=803529&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/LocalMySQLManager.java (original)
+++ hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/LocalMySQLManager.java Wed Aug 12 14:19:06 2009
@@ -30,7 +30,6 @@
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.nio.CharBuffer;
-import java.sql.ResultSet;
 import java.sql.SQLException;
 import java.util.ArrayList;
 import java.util.List;

Added: hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/OracleManager.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/OracleManager.java?rev=803529&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/OracleManager.java (added)
+++ hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/OracleManager.java Wed Aug 12 14:19:06 2009
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.sqoop.manager;
+
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.ArrayList;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.sqoop.ImportOptions;
+import org.apache.hadoop.sqoop.util.ImportError;
+
+/**
+ * Manages connections to Oracle databases.
+ * Requires the Oracle JDBC driver.
+ */
+public class OracleManager extends GenericJdbcManager {
+
+  public static final Log LOG = LogFactory.getLog(OracleManager.class.getName());
+
+  // driver class to ensure is loaded when making db connection.
+  private static final String DRIVER_CLASS = "oracle.jdbc.OracleDriver";
+
+  public OracleManager(final ImportOptions opts) {
+    super(DRIVER_CLASS, opts);
+  }
+
+  protected String getColNamesQuery(String tableName) {
+    // SqlManager uses "tableName AS t" which doesn't work in Oracle.
+    return "SELECT t.* FROM " + tableName + " t";
+  }
+
+  /**
+   * Create a connection to the database; usually used only from within
+   * getConnection(), which enforces a singleton guarantee around the
+   * Connection object.
+   *
+   * Oracle-specific driver uses READ_COMMITTED which is the weakest
+   * semantics Oracle supports.
+   */
+  protected Connection makeConnection() throws SQLException {
+
+    Connection connection;
+    String driverClass = getDriverClass();
+
+    try {
+      Class.forName(driverClass);
+    } catch (ClassNotFoundException cnfe) {
+      throw new RuntimeException("Could not load db driver class: " + driverClass);
+    }
+
+    String username = options.getUsername();
+    String password = options.getPassword();
+    if (null == username) {
+      connection = DriverManager.getConnection(options.getConnectString());
+    } else {
+      connection = DriverManager.getConnection(options.getConnectString(), username, password);
+    }
+
+    // We only use this for metadata queries. Loosest semantics are okay.
+    connection.setTransactionIsolation(Connection.TRANSACTION_READ_COMMITTED);
+
+    return connection;
+  }
+}
+

Modified: hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/AllTests.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/AllTests.java?rev=803529&r1=803528&r2=803529&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/AllTests.java (original)
+++ hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/AllTests.java Wed Aug 12 14:19:06 2009
@@ -23,6 +23,7 @@
 import org.apache.hadoop.sqoop.lib.TestRecordParser;
 import org.apache.hadoop.sqoop.manager.LocalMySQLTest;
 import org.apache.hadoop.sqoop.manager.MySQLAuthTest;
+import org.apache.hadoop.sqoop.manager.OracleManagerTest;
 import org.apache.hadoop.sqoop.manager.TestHsqldbManager;
 import org.apache.hadoop.sqoop.manager.TestSqlManager;
 import org.apache.hadoop.sqoop.orm.TestClassWriter;
@@ -58,6 +59,7 @@
     suite.addTestSuite(TestFieldFormatter.class);
     suite.addTestSuite(TestImportOptions.class);
     suite.addTestSuite(TestParseMethods.class);
+    suite.addTestSuite(OracleManagerTest.class);
 
     return suite;
   }

Added: hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/manager/OracleManagerTest.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/manager/OracleManagerTest.java?rev=803529&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/manager/OracleManagerTest.java (added)
+++ hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/manager/OracleManagerTest.java Wed Aug 12 14:19:06 2009
@@ -0,0 +1,218 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.sqoop.manager;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.FileInputStream;
+import java.io.File;
+import java.sql.Connection;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.ArrayList;
+
+import junit.framework.TestCase;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.sqoop.ImportOptions;
+import org.apache.hadoop.sqoop.testutil.ImportJobTestCase;
+import org.apache.hadoop.sqoop.util.FileListing;
+
+/**
+ * Test the OracleManager implementation.
+ *
+ * This uses JDBC to import data from an Oracle database into HDFS.
+ *
+ * Since this requires an Oracle installation on your local machine to use,
+ * this class is named in such a way that Hadoop's default QA process does
+ * not run it. You need to run this manually with -Dtestcase=OracleManagerTest.
+ *
+ * You need to put Oracle's JDBC driver library (ojdbc6_g.jar) in a location
+ * where Hadoop will be able to access it (since this library cannot be checked
+ * into Apache's tree for licensing reasons).
+ *
+ * To set up your test environment:
+ *   Install Oracle Express Edition 10.2.0.
+ *   Create a database user named SQOOPTEST
+ *   Set the user's password to '12345'
+ *   Grant the user the CREATE TABLE privilege
+ */
+public class OracleManagerTest extends ImportJobTestCase {
+
+  public static final Log LOG = LogFactory.getLog(OracleManagerTest.class.getName());
+
+  static final String ORACLE_DATABASE_NAME = "xe"; // Express edition hardcoded name.
+  static final String TABLE_NAME = "EMPLOYEES";
+  static final String CONNECT_STRING = "jdbc:oracle:thin:@//localhost/" + ORACLE_DATABASE_NAME;
+  static final String ORACLE_USER_NAME = "SQOOPTEST";
+  static final String ORACLE_USER_PASS = "12345";
+
+  // instance variables populated during setUp, used during tests
+  private OracleManager manager;
+
+  @Before
+  public void setUp() {
+    ImportOptions options = new ImportOptions(CONNECT_STRING, TABLE_NAME);
+    options.setUsername(ORACLE_USER_NAME);
+    options.setPassword(ORACLE_USER_PASS);
+
+    manager = new OracleManager(options);
+
+    Connection connection = null;
+    Statement st = null;
+
+    try {
+      connection = manager.getConnection();
+      connection.setAutoCommit(false);
+      st = connection.createStatement();
+
+      // create the database table and populate it with data. 
+      st.executeUpdate("BEGIN EXECUTE IMMEDIATE 'DROP TABLE " + TABLE_NAME + "'; "
+          + "exception when others then null; end;");
+      st.executeUpdate("CREATE TABLE " + TABLE_NAME + " ("
+          + "id INT NOT NULL, "
+          + "name VARCHAR2(24) NOT NULL, "
+          + "start_date DATE, "
+          + "salary FLOAT, "
+          + "dept VARCHAR2(32), "
+          + "PRIMARY KEY (id))");
+
+      st.executeUpdate("INSERT INTO " + TABLE_NAME + " VALUES("
+          + "1,'Aaron',to_date('2009-05-14','yyyy-mm-dd'),1000000.00,'engineering')");
+      st.executeUpdate("INSERT INTO " + TABLE_NAME + " VALUES("
+          + "2,'Bob',to_date('2009-04-20','yyyy-mm-dd'),400.00,'sales')");
+      st.executeUpdate("INSERT INTO " + TABLE_NAME + " VALUES("
+          + "3,'Fred',to_date('2009-01-23','yyyy-mm-dd'),15.00,'marketing')");
+      connection.commit();
+    } catch (SQLException sqlE) {
+      LOG.error("Encountered SQL Exception: " + sqlE);
+      sqlE.printStackTrace();
+      fail("SQLException when running test setUp(): " + sqlE);
+    } finally {
+      try {
+        if (null != st) {
+          st.close();
+        }
+
+        if (null != connection) {
+          connection.close();
+        }
+      } catch (SQLException sqlE) {
+        LOG.warn("Got SQLException when closing connection: " + sqlE);
+      }
+    }
+  }
+
+  @After
+  public void tearDown() {
+    try {
+      manager.close();
+    } catch (SQLException sqlE) {
+      LOG.error("Got SQLException: " + sqlE.toString());
+      fail("Got SQLException: " + sqlE.toString());
+    }
+  }
+
+  private String [] getArgv() {
+    ArrayList<String> args = new ArrayList<String>();
+
+    args.add("-D");
+    args.add("fs.default.name=file:///");
+    args.add("-D");
+    args.add("mapred.job.tracker=local");
+
+    args.add("--table");
+    args.add(TABLE_NAME);
+    args.add("--warehouse-dir");
+    args.add(getWarehouseDir());
+    args.add("--connect");
+    args.add(CONNECT_STRING);
+    args.add("--username");
+    args.add(ORACLE_USER_NAME);
+    args.add("--password");
+    args.add(ORACLE_USER_PASS);
+
+    return args.toArray(new String[0]);
+  }
+
+  private void runOracleTest(String [] expectedResults) throws IOException {
+
+    Path warehousePath = new Path(this.getWarehouseDir());
+    Path tablePath = new Path(warehousePath, TABLE_NAME);
+    Path filePath = new Path(tablePath, "part-00000");
+
+    File tableFile = new File(tablePath.toString());
+    if (tableFile.exists() && tableFile.isDirectory()) {
+      // remove the directory before running the import.
+      FileListing.recursiveDeleteDir(tableFile);
+    }
+
+    String [] argv = getArgv();
+    try {
+      runImport(argv);
+    } catch (IOException ioe) {
+      LOG.error("Got IOException during import: " + ioe.toString());
+      ioe.printStackTrace();
+      fail(ioe.toString());
+    }
+
+    File f = new File(filePath.toString());
+    assertTrue("Could not find imported data file", f.exists());
+    BufferedReader r = null;
+    try {
+      // Read through the file and make sure it's all there.
+      r = new BufferedReader(new InputStreamReader(new FileInputStream(f)));
+      for (String expectedLine : expectedResults) {
+        assertEquals(expectedLine, r.readLine());
+      }
+    } catch (IOException ioe) {
+      LOG.error("Got IOException verifying results: " + ioe.toString());
+      ioe.printStackTrace();
+      fail(ioe.toString());
+    } finally {
+      IOUtils.closeStream(r);
+    }
+  }
+
+  @Test
+  public void testOracleImport() throws IOException {
+    // no quoting of strings allowed.
+    // NOTE: Oracle JDBC 11.1 drivers auto-cast SQL DATE to java.sql.Timestamp.
+    // Even if you define your columns as DATE in Oracle, they may still contain
+    // time information, so the JDBC drivers lie to us and will never tell us we have
+    // a strict DATE type. Thus we include HH:MM:SS.mmmmm below.
+    // See http://www.oracle.com/technology/tech/java/sqlj_jdbc/htdocs/jdbc_faq.html#08_01
+    String [] expectedResults = {
+        "1,Aaron,2009-05-14 00:00:00.0,1000000,engineering",
+        "2,Bob,2009-04-20 00:00:00.0,400,sales",
+        "3,Fred,2009-01-23 00:00:00.0,15,marketing"
+    };
+
+    runOracleTest(expectedResults);
+  }
+}