You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by to...@apache.org on 2009/05/26 12:29:40 UTC

svn commit: r778646 [1/3] - in /hadoop/core/trunk: ./ src/contrib/sqoop/ src/contrib/sqoop/ivy/ src/contrib/sqoop/src/ src/contrib/sqoop/src/java/ src/contrib/sqoop/src/java/org/ src/contrib/sqoop/src/java/org/apache/ src/contrib/sqoop/src/java/org/apa...

Author: tomwhite
Date: Tue May 26 10:29:38 2009
New Revision: 778646

URL: http://svn.apache.org/viewvc?rev=778646&view=rev
Log:
HADOOP-5815. Sqoop: A database import tool for Hadoop. Contributed by Aaron Kimball.

Added:
    hadoop/core/trunk/src/contrib/sqoop/
    hadoop/core/trunk/src/contrib/sqoop/build.xml
    hadoop/core/trunk/src/contrib/sqoop/ivy/
    hadoop/core/trunk/src/contrib/sqoop/ivy.xml
    hadoop/core/trunk/src/contrib/sqoop/ivy/libraries.properties
    hadoop/core/trunk/src/contrib/sqoop/readme.html
    hadoop/core/trunk/src/contrib/sqoop/src/
    hadoop/core/trunk/src/contrib/sqoop/src/java/
    hadoop/core/trunk/src/contrib/sqoop/src/java/org/
    hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/
    hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/
    hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/
    hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/ConnFactory.java
    hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/ImportOptions.java
    hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/Sqoop.java
    hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/
    hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/BigDecimalSerializer.java
    hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/JdbcWritableBridge.java
    hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/
    hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/ConnManager.java
    hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/GenericJdbcManager.java
    hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/HsqldbManager.java
    hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/MySQLManager.java
    hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/SqlManager.java
    hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/mapred/
    hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/mapred/ImportJob.java
    hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/mapred/TextImportMapper.java
    hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/orm/
    hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/orm/ClassWriter.java
    hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/orm/CompilationManager.java
    hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/util/
    hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/util/ClassLoaderStack.java
    hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/util/FileListing.java
    hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/util/ImportError.java
    hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/util/ResultSetPrinter.java
    hadoop/core/trunk/src/contrib/sqoop/src/test/
    hadoop/core/trunk/src/contrib/sqoop/src/test/org/
    hadoop/core/trunk/src/contrib/sqoop/src/test/org/apache/
    hadoop/core/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/
    hadoop/core/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/
    hadoop/core/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/AllTests.java
    hadoop/core/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/TestAllTables.java
    hadoop/core/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/TestColumnTypes.java
    hadoop/core/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/TestMultiCols.java
    hadoop/core/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/TestOrderBy.java
    hadoop/core/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/manager/
    hadoop/core/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/manager/TestHsqldbManager.java
    hadoop/core/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/manager/TestSqlManager.java
    hadoop/core/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/orm/
    hadoop/core/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/orm/TestClassWriter.java
    hadoop/core/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/testutil/
    hadoop/core/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/testutil/DirUtil.java
    hadoop/core/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/testutil/HsqldbTestServer.java
    hadoop/core/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/testutil/ImportJobTestCase.java
    hadoop/core/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/testutil/SeqFileReader.java
Modified:
    hadoop/core/trunk/CHANGES.txt

Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=778646&r1=778645&r2=778646&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Tue May 26 10:29:38 2009
@@ -125,6 +125,9 @@
     HADOOP-4829. Allow FileSystem shutdown hook to be disabled.
     (Todd Lipcon via tomwhite)
 
+    HADOOP-5815. Sqoop: A database import tool for Hadoop.
+    (Aaron Kimball via tomwhite)
+
   IMPROVEMENTS
 
     HADOOP-4565. Added CombineFileInputFormat to use data locality information

Added: hadoop/core/trunk/src/contrib/sqoop/build.xml
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/sqoop/build.xml?rev=778646&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/sqoop/build.xml (added)
+++ hadoop/core/trunk/src/contrib/sqoop/build.xml Tue May 26 10:29:38 2009
@@ -0,0 +1,91 @@
+<?xml version="1.0"?>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<!--
+Before you can run these subtargets directly, you need
+to call at top-level: ant deploy-contrib compile-core-test
+-->
+<project name="sqoop" default="jar">
+
+  <import file="../build-contrib.xml"/>
+  <property environment="env"/>
+
+  <!-- ================================================================== -->
+  <!-- Run unit tests                                                     -->
+  <!-- Override with our own version so we can set hadoop.alt.classpath   -->
+  <!-- ================================================================== -->
+  <target name="test" depends="compile-test, compile" if="test.available">
+    <echo message="contrib: ${name}"/>
+    <delete dir="${hadoop.log.dir}"/>
+    <mkdir dir="${hadoop.log.dir}"/>
+    <delete dir="${build.test}/data"/>
+    <mkdir dir="${build.test}/data" />
+    <junit
+      printsummary="yes" showoutput="${test.output}"
+      haltonfailure="no" fork="yes" maxmemory="256m"
+      errorProperty="tests.failed" failureProperty="tests.failed"
+      timeout="${test.timeout}"
+      dir="${build.test}/data">
+
+      <sysproperty key="test.build.data" value="${build.test}/data"/>
+      <sysproperty key="build.test" value="${build.test}"/>
+      <sysproperty key="contrib.name" value="${name}"/>
+
+      <!--
+           Added property needed to use the .class files for compilation
+           instead of depending on hadoop-*-core.jar
+      -->
+      <sysproperty key="hadoop.alt.classpath"
+        value="${hadoop.root}/build/classes" />
+
+      <!-- requires fork=yes for:
+        relative File paths to use the specified user.dir
+        classpath to use build/contrib/*.jar
+      -->
+      <sysproperty key="user.dir" value="${build.test}/data"/>
+
+      <!-- Setting the user.dir property is actually meaningless as it
+          is read-only in the Linux Sun JDK. Provide an alternate sysprop
+          to specify where generated code should go.
+      -->
+      <sysproperty key="sqoop.src.dir" value="${build.test}/data"/>
+
+      <sysproperty key="fs.default.name" value="${fs.default.name}"/>
+      <sysproperty key="hadoop.test.localoutputfile" value="${hadoop.test.localoutputfile}"/>
+      <sysproperty key="hadoop.log.dir" value="${hadoop.log.dir}"/>
+
+      <!-- tools.jar from Sun JDK also required to invoke javac. -->
+      <classpath>
+        <path refid="test.classpath"/>
+        <path refid="contrib-classpath"/>
+        <pathelement path="${env.JAVA_HOME}/lib/tools.jar" />
+      </classpath>
+      <formatter type="${test.junit.output.format}" />
+      <batchtest todir="${build.test}" unless="testcase">
+        <fileset dir="${src.test}"
+                 includes="**/Test*.java" excludes="**/${test.exclude}.java" />
+      </batchtest>
+      <batchtest todir="${build.test}" if="testcase">
+        <fileset dir="${src.test}" includes="**/${testcase}.java"/>
+      </batchtest>
+    </junit>
+    <fail if="tests.failed">Tests failed!</fail>
+  </target>
+
+</project>

Added: hadoop/core/trunk/src/contrib/sqoop/ivy.xml
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/sqoop/ivy.xml?rev=778646&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/sqoop/ivy.xml (added)
+++ hadoop/core/trunk/src/contrib/sqoop/ivy.xml Tue May 26 10:29:38 2009
@@ -0,0 +1,60 @@
+<?xml version="1.0" ?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+    
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<ivy-module version="1.0">
+  <info organisation="org.apache.hadoop" module="${ant.project.name}">
+    <license name="Apache 2.0"/>
+    <ivyauthor name="Apache Hadoop Team" url="http://hadoop.apache.org"/>
+    <description>
+        Apache Hadoop
+    </description>
+  </info>
+  <configurations defaultconfmapping="default">
+    <!--these match the Maven configurations-->
+    <conf name="default" extends="master,runtime"/>
+    <conf name="master" description="contains the artifact but no dependencies"/>
+    <conf name="runtime" description="runtime but not the artifact" />
+
+    <conf name="common" visibility="private"
+      extends="runtime"
+      description="artifacts needed to compile/test the application"/>
+    <conf name="test" visibility="private" extends="runtime"/>
+  </configurations>
+
+  <publications>
+    <!--get the artifact from our module name-->
+    <artifact conf="master"/>
+  </publications>
+  <dependencies>
+    <dependency org="commons-logging"
+      name="commons-logging"
+      rev="${commons-logging.version}"
+      conf="common->default"/>
+    <dependency org="commons-httpclient"
+      name="commons-httpclient"
+      rev="${commons-httpclient.version}"
+      conf="common->default"/>
+    <dependency org="junit"
+      name="junit"
+      rev="${junit.version}"
+      conf="common->default"/>
+    <dependency org="log4j"
+      name="log4j"
+      rev="${log4j.version}"
+      conf="common->master"/>
+    </dependencies>
+</ivy-module>

Added: hadoop/core/trunk/src/contrib/sqoop/ivy/libraries.properties
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/sqoop/ivy/libraries.properties?rev=778646&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/sqoop/ivy/libraries.properties (added)
+++ hadoop/core/trunk/src/contrib/sqoop/ivy/libraries.properties Tue May 26 10:29:38 2009
@@ -0,0 +1,21 @@
+#   Licensed to the Apache Software Foundation (ASF) under one or more
+#   contributor license agreements.  See the NOTICE file distributed with
+#   this work for additional information regarding copyright ownership.
+#   The ASF licenses this file to You under the Apache License, Version 2.0
+#   (the "License"); you may not use this file except in compliance with
+#   the License.  You may obtain a copy of the License at
+#  
+#       http://www.apache.org/licenses/LICENSE-2.0
+#    
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+#This properties file lists the versions of the various artifacts used by streaming.
+#It drives ivy and the generation of a maven POM
+
+#Please list the dependencies name with version if they are different from the ones
+#listed in the global libraries.properties file (in alphabetical order)
+

Added: hadoop/core/trunk/src/contrib/sqoop/readme.html
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/sqoop/readme.html?rev=778646&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/sqoop/readme.html (added)
+++ hadoop/core/trunk/src/contrib/sqoop/readme.html Tue May 26 10:29:38 2009
@@ -0,0 +1,186 @@
+<html>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+    
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<head>
+<title>Sqoop User's Guide</title>
+</head>
+<body>
+    <h1><a name="SqoopUsersGuide-Sqoop"></a>Sqoop</h1>
+
+<h2><a name="SqoopUsersGuide-Overview"></a>Overview</h2>
+
+<p>Sqoop is a tool designed to help users of large data import existing relational databases into their Hadoop clusters. Sqoop uses JDBC to connect to a database, examine the schema for tables, and auto-generate the necessary classes to import data into HDFS. It then instantiates a MapReduce job to read the table from the database via the DBInputFormat (JDBC-based InputFormat). The table is read into a set of files loaded into HDFS. Both SequenceFile and text-based targets are supported.</p>
+
+<p>Longer term, Sqoop will support automatic connectivity to Hive, with the ability to load data files directly into the Hive warehouse directory, and also to inject the appropriate table definition into the metastore.</p>
+
+<h2><a name="SqoopUsersGuide-GettingStarted"></a>Getting Started</h2>
+
+<p><b>Getting Sqoop</b> Sqoop is distributed as a "contrib" jar with Hadoop. It is built in the <tt>contrib/sqoop/</tt> directory.</p>
+
+<p>You can run Sqoop by running:</p>
+
+<div class="code panel" style="border-width: 1px;"><div class="codeContent panelContent">
+<pre class="code-java">
+$ hadoop jar /path/to/sqoop.jar org.apache.hadoop.sqoop.Sqoop (options)
+</pre>
+</div></div>
+
+
+<p>This does nothing of interest without any options. The <tt>&#45;&#45;help</tt> option displays the full usage instructions.</p>
+
+<h3><a name="SqoopUsersGuide-ConnectingtoaDatabaseServer"></a>Connecting to a Database Server</h3>
+
+<p>Sqoop is designed to import tables from a database into HDFS. As such, it requires a <em>connect string</em> that describes how to connect to the database. The <em>connect string</em> looks like a URL, and is communicated to Sqoop with the <tt>&#45;&#45;connect</tt> argument. This describes the server and database to connect to; it may also specify the port. e.g.: </p>
+
+<div class="code panel" style="border-width: 1px;"><div class="codeContent panelContent">
+
+<pre class="code-java">
+$ hadoop jar /path/to/sqoop.jar org.apache.hadoop.sqoop.Sqoop --connect jdbc:mysql:<span class="code-comment">//database.example.com/employees</span>
+</pre>
+</div></div>
+
+<p>This string will connect to a MySQL database named <tt>employees</tt> on the host <tt>database.example.com</tt>. It's important that you <b>do not</b> use the URL <tt>localhost</tt> if you intend to use Sqoop with a distributed Hadoop cluster. The connect string you supply will be used on all the TaskTracker nodes in your MapReduce cluster; if they're told to connect to the literal name <tt>localhost</tt>, they'll each reach a different database (or more likely, no database at all)! Instead, you should use the full DNS or IP address of the database host that can be seen by all your remote nodes.</p>
+
+<p>You may need to authenticate against the database before you can access it. The <tt>&#45;&#45;username</tt> and <tt>&#45;&#45;password</tt> parameters can be used to supply a username and a password to the database. (Note: password access currently requires passing the password on the command-line, which is insecure.) e.g.:</p>
+
+<div class="code panel" style="border-width: 1px;"><div class="codeContent panelContent">
+<pre class="code-java">
+$ hadoop jar /path/to/sqoop.jar org.apache.hadoop.sqoop.Sqoop --connect jdbc:mysql:<span class="code-comment">//database.example.com/employees --username aaron --password 12345</span>
+</pre>
+</div></div>
+
+<p>Sqoop automatically supports <span class="nobr"><a href="http://www.mysql.com" rel="nofollow">MySQL</a></span> and <span class="nobr"><a href="http://hsqldb.org/" rel="nofollow">HSQLDB</a></span>. Connect strings beginning with <tt>jdbc:mysql://</tt> and <tt>jdbc:hsqldb:hsql://</tt> automatically inform Sqoop of the correct JDBC driver class to load. HSQLDB's JDBC driver is bundled with Hadoop, and so will work "out of the box." If you install <a href="http://dev.mysql.com/downloads/connector/j/5.1.html">MySQL's Connector/J driver</a> in Hadoop's <tt>lib/</tt> directory, Sqoop will also automatically take advantage of this for any <tt>jdbc:mysql://</tt> connect strings you use. You can use Sqoop with any other JDBC-compliant database as well. First, download the appropriate JDBC driver for the database you want to import from, and install the <tt>.jar</tt> file in the <tt>$HADOOP_HOME/lib</tt> directory on all machines in your Hadoop cluster, or some other directory which
  is in the classpath on all nodes. Each driver jar also has a specific <em>driver class</em> which defines the entry-point to the driver. For example, MySQL's Connector/J library has a driver class of <tt>com.mysql.jdbc.Driver</tt>. Refer to your database vendor-specific documentation to determine the main driver class. This class must be provided as an argument to Sqoop with <tt>&#45;&#45;driver</tt>.</p>
+
+<p>For example, to connect to a postgres database, first download the driver from <span class="nobr"><a href="http://jdbc.postgresql.org" rel="nofollow">http://jdbc.postgresql.org</a></span> and install it in your Hadoop lib path. Then run Sqoop with something like:</p>
+
+<div class="code panel" style="border-width: 1px;"><div class="codeContent panelContent">
+<pre class="code-java">
+$ hadoop jar /path/to/sqoop.jar org.apache.hadoop.sqoop.Sqoop --connect jdbc:postgresql:<span class="code-comment">//postgres-server.example.com/employees --driver org.postgresql.Driver</span>
+</pre>
+</div></div>
+
+<p>Note: Sqoop uses the JDBC specification to connect to databases; this should provide a versatile client that interoperates with many different databases. That having been said, we have only thoroughly tested this tool with HSQLDB and MySQL.</p>
+
+<h3><a name="SqoopUsersGuide-ListingAvailableDatabases"></a>Listing Available Databases</h3>
+
+<p>Once connected to a database server, you can list the available databases with the <tt>&#45;&#45;list-databases</tt> parameter. This currently is supported only by HSQLDB and MySQL. Note that in this case, the connect string does not include a database name, just a server address.</p>
+
+<div class="code panel" style="border-width: 1px;"><div class="codeContent panelContent">
+<pre class="code-java">
+$ hadoop jar /path/to/sqoop.jar org.apache.hadoop.sqoop.Sqoop --connect jdbc:mysql:<span class="code-comment">//database.example.com/ --list-databases
+</span>information_schema
+employees
+</pre>
+</div></div>
+
+<p><em>This only works with HSQLDB and MySQL. A vendor-agnostic implementation of this function has not yet been implemented.</em></p>
+
+<h3><a name="SqoopUsersGuide-ListingAvailableTables"></a>Listing Available Tables</h3>
+
+<p>Within a database, you can list the tables available for import with the <tt>&#45;&#45;list-tables</tt> command. The following example shows four tables available within the "employees" example database:</p>
+
+<div class="code panel" style="border-width: 1px;"><div class="codeContent panelContent">
+<pre class="code-java">
+$ hadoop jar /path/to/sqoop.jar org.apache.hadoop.sqoop.Sqoop --connect jdbc:mysql:<span class="code-comment">//database.example.com/employees --list-tables
+
+</span>employee_names
+payroll_checks
+job_descriptions
+office_supplies
+</pre>
+</div></div>
+
+<h2><a name="SqoopUsersGuide-AutomaticFulldatabaseImport"></a>Automatic Full-database Import</h2>
+
+<p>If you want to import all the tables in a database, you can use the <tt>&#45;&#45;all-tables</tt> command to do so:</p>
+
+<div class="code panel" style="border-width: 1px;"><div class="codeContent panelContent">
+<pre class="code-java">
+$ hadoop jar /path/to/sqoop.jar org.apache.hadoop.sqoop.Sqoop --connect jdbc:mysql:<span class="code-comment">//database.example.com/employees --all-tables</span>
+
+</pre>
+</div></div>
+
+<p>This will query the database for the available tables, generate an ORM class for each table, and run a MapReduce job to import each one. Hadoop uses the <span class="nobr"><a href="http://issues.apache.org/jira/browse/HADOOP-2536" rel="nofollow">DBInputFormat</a></span> to read from a database into a Mapper instance. To read a table into a MapReduce program requires creating a class to hold the fields of one row of the table. One of the benefits of Sqoop is that it generates this class definition for you, based on the table definition in the database. </p>
+
+<p>The generated <tt>.java</tt> files are, by default, placed in the current directory. You can supply a different directory with the <tt>&#45;&#45;outdir</tt> parameter. These are then compiled into <tt>.class</tt> and <tt>.jar</tt> files for use by the MapReduce job that it launches. These files are created in a temporary directory. You can redirect this target with <tt>&#45;&#45;bindir</tt>.</p>
+
+<p>Each table will be imported into a separate directory in HDFS, with the same name as the table. For instance, if my Hadoop username is <tt>aaron</tt>, the above command would have generated the following directories in HDFS:</p>
+
+<div class="code panel" style="border-width: 1px;"><div class="codeContent panelContent">
+<pre class="code-java">
+/user/aaron/employee_names
+/user/aaron/payroll_checks
+/user/aaron/job_descriptions
+/user/aaron/office_supplies
+</pre>
+</div></div>
+
+<p>You can change the base directory under which the tables are loaded with the <tt>&#45;&#45;warehouse-dir</tt> parameter. For example:</p>
+
+<div class="code panel" style="border-width: 1px;"><div class="codeContent panelContent">
+<pre class="code-java">
+$ hadoop jar /path/to/sqoop.jar org.apache.hadoop.sqoop.Sqoop --connect jdbc:mysql:<span class="code-comment">//database.example.com/employees --all-tables --warehouse-dir /common/warehouse</span>
+</pre>
+</div></div>
+
+<p>This would create the following directories instead:</p>
+<div class="code panel" style="border-width: 1px;"><div class="codeContent panelContent">
+<pre class="code-java">
+/common/warehouse/employee_names
+/common/warehouse/payroll_checks
+/common/warehouse/job_descriptions
+/common/warehouse/office_supplies
+</pre>
+</div></div>
+
+<p>By default the data will be read into text files in HDFS. Each of the columns will be represented as comma-delimited text. Each row is terminated by a newline. There is currently no mechanism to quote or escape commas or newlines inside of <tt>CHAR</tt> or <tt>VARCHAR</tt> columns of the database. Applications which depend on comma-delimited parsing of the output files must be careful if commas or newlines may be present in the database. </p>
+
+<p>If you expect commas or newlines to appear in text columns of the database, or you want to leverage compression and binary file formats, the <tt>&#45;&#45;as-sequencefile</tt> argument to Sqoop will import the table to a set of SequenceFiles instead. As this uses a separate object for each field of each database record, no quoting or escaping of values is necessary. This representation is also likely to be higher performance when used as an input to subsequent MapReduce programs. For completeness, Sqoop provides an <tt>&#45;&#45;as-textfile</tt> option, which is implied by default. An <tt>&#45;&#45;as-textfile</tt> on the command-line will override a previous <tt>&#45;&#45;as-sequencefile</tt> argument.</p>
+
+<p>The SequenceFile format will embed the records from the database as objects using the code generated by Sqoop. It is important that you retain the <tt>.java file</tt> for this class, as you will need to be able to instantiate the same type to read the objects back later, in other user-defined applications.</p>
+
+<h2><a name="SqoopUsersGuide-ImportingIndividualTables"></a>Importing Individual Tables</h2>
+
+<p>In addition to full-database imports, Sqoop will allow you to import individual tables. Instead of using <tt>&#45;&#45;all-tables</tt>, specify the name of a particular table with the <tt>&#45;&#45;table</tt> argument:</p>
+
+<div class="code panel" style="border-width: 1px;"><div class="codeContent panelContent">
+<pre class="code-java">
+$ hadoop jar /path/to/sqoop.jar org.apache.hadoop.sqoop.Sqoop --connect jdbc:mysql:<span class="code-comment">//database.example.com/employees --table employee_names </span>
+</pre>
+</div></div>
+
+<p>You can further specify a subset of the columns in a table by using the <tt>&#45;&#45;columns</tt> argument. This takes a list of column names, delimited by commas, with no spaces in between. e.g.:</p>
+
+<div class="code panel" style="border-width: 1px;"><div class="codeContent panelContent">
+<pre class="code-java">
+
+$ hadoop jar /path/to/sqoop.jar org.apache.hadoop.sqoop.Sqoop --connect jdbc:mysql:<span class="code-comment">//database.example.com/employees --table employee_names --columns employee_id,first_name,last_name,dept_id</span>
+</pre>
+</div></div>
+
+<p>Sqoop will use a MapReduce job to read sections of the table in parallel. For the MapReduce tasks to divide the table space, the results returned by the database must be orderable. Sqoop will automatically detect the primary key for a table and use that to order the results. If no primary key is available, or (less likely) you want to order the results along a different column, you can specify the column name with <tt>&#45;&#45;order-by</tt>. <b>Important:</b> To guarantee correctness of your input, you must select an ordering column for which each row has a unique value. If duplicate values appear in the ordering column, the results of the import are undefined, and Sqoop will not be able to detect the error.</p>
+
+<p>The <tt>&#45;&#45;columns</tt> and <tt>&#45;&#45;order-by</tt> arguments are incompatible with <tt>&#45;&#45;all-tables</tt>. If you require special handling for some of the tables, then you must manually run a separate import job for each table.</p>
+
+<h2><a name="SqoopUsersGuide-MiscellaneousAdditionalArguments"></a>Miscellaneous Additional Arguments</h2>
+
+<p>If you want to generate the Java classes to represent tables without actually performing an import, supply a connect string and (optionally) credentials as above, as well as <tt>&#45;&#45;all-tables</tt> or <tt>&#45;&#45;table</tt>, but also use the <b><tt>&#45;&#45;generate-only</tt></b> argument. This will generate the classes and cease further operation.</p>
+
+<p>You can override the <tt>$HADOOP_HOME</tt> environment variable within Sqoop with the <tt>&#45;&#45;hadoop-home</tt> argument. </p>
+
+</body></html>

Added: hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/ConnFactory.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/ConnFactory.java?rev=778646&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/ConnFactory.java (added)
+++ hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/ConnFactory.java Tue May 26 10:29:38 2009
@@ -0,0 +1,81 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.sqoop;
+
+import org.apache.hadoop.sqoop.manager.ConnManager;
+import org.apache.hadoop.sqoop.manager.GenericJdbcManager;
+import org.apache.hadoop.sqoop.manager.HsqldbManager;
+import org.apache.hadoop.sqoop.manager.MySQLManager;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * Static factory class to create the ConnManager type required
+ * for the current import job.
+ */
+public final class ConnFactory {
+
+  public static final Log LOG = LogFactory.getLog(ConnFactory.class.getName());
+
+  private ConnFactory() { }
+
+  /**
+   * Factory method to get a ConnManager for the given JDBC connect string
+   * @param opts The parsed command-line options
+   * @return a ConnManager instance for the appropriate database
+   * @throws IOException if it cannot find a ConnManager for this schema
+   */
+  public static ConnManager getManager(ImportOptions opts) throws IOException {
+
+    String manualDriver = opts.getDriverClassName();
+    if (manualDriver != null) {
+      // User has manually specified JDBC implementation with --driver.
+      // Just use GenericJdbcManager.
+      return new GenericJdbcManager(manualDriver, opts);
+    }
+
+    String connectStr = opts.getConnectString();
+
+    int schemeStopIdx = connectStr.indexOf("//");
+    if (-1 == schemeStopIdx) {
+      // no scheme component?
+      throw new IOException("Malformed connect string: " + connectStr);
+    }
+
+    String scheme = connectStr.substring(0, schemeStopIdx);
+
+    if (null == scheme) {
+      // We don't know if this is a mysql://, hsql://, etc.
+      // Can't do anything with this.
+      throw new IOException("Null scheme associated with connect string.");
+    }
+
+    if (scheme.equals("jdbc:mysql:")) {
+      return new MySQLManager(opts);
+    } else if (scheme.equals("jdbc:hsqldb:hsql:")) {
+      return new HsqldbManager(opts);
+    } else {
+      throw new IOException("Unknown connection scheme: " + scheme);
+    }
+  }
+}
+

Added: hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/ImportOptions.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/ImportOptions.java?rev=778646&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/ImportOptions.java (added)
+++ hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/ImportOptions.java Tue May 26 10:29:38 2009
@@ -0,0 +1,396 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.hadoop.sqoop;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.Properties;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * Command-line arguments used by Sqoop
+ */
+public class ImportOptions {
+
+  public static final Log LOG = LogFactory.getLog(ImportOptions.class.getName());
+
+  /**
+   * Thrown when invalid cmdline options are given
+   */
+  @SuppressWarnings("serial")
+  public static class InvalidOptionsException extends Exception {
+
+    private String message;
+
+    public InvalidOptionsException(final String msg) {
+      this.message = msg;
+    }
+
+    public String getMessage() {
+      return message;
+    }
+
+    public String toString() {
+      return getMessage();
+    }
+  }
+
+  // control-flow selector based on command-line switches.
+  public enum ControlAction {
+    ListDatabases,  // list available databases and exit.
+    ListTables,     // list available tables and exit.
+    GenerateOnly,   // generate ORM code but do not import.
+    FullImport,     // generate code (as needed) and import.
+    DebugExec       // just execute a single sql command and print its results.
+  }
+
+  // selects in-HDFS destination file format
+  public enum FileLayout {
+    TextFile,
+    SequenceFile
+  }
+
+
+  // TODO(aaron): Adding something here? Add a getter, a cmdline switch, and a properties file
+  // entry in loadFromProperties(). Add a default value in initDefaults() if you need one.
+  // Make sure you add the stub to the testdata/sqoop.properties.template file.
+  private String connectString;
+  private String tableName;
+  private String [] columns;
+  private boolean allTables;
+  private String username;
+  private String password;
+  private String codeOutputDir;
+  private String jarOutputDir;
+  private ControlAction action;
+  private String hadoopHome;
+  private String orderByCol;
+  private String debugSqlCmd;
+  private String driverClassName;
+  private String warehouseDir;
+  private FileLayout layout;
+
+  private static final String DEFAULT_CONFIG_FILE = "sqoop.properties";
+
+  public ImportOptions() {
+    initDefaults();
+  }
+
+  /**
+   * Alternate ImportOptions interface used mostly for unit testing
+   * @param connect JDBC connect string to use
+   * @param database Database to read
+   * @param table Table to read
+   */
+  public ImportOptions(final String connect, final String table) {
+    initDefaults();
+
+    this.connectString = connect;
+    this.tableName = table;
+  }
+
+  private void loadFromProperties() {
+    File configFile = new File(DEFAULT_CONFIG_FILE);
+    if (!configFile.canRead()) {
+      return; //can't do this.
+    }
+
+    Properties props = new Properties();
+    InputStream istream = null;
+    try {
+      LOG.info("Loading properties from " + configFile.getAbsolutePath());
+      istream = new FileInputStream(configFile);
+      props.load(istream);
+
+      this.hadoopHome = props.getProperty("hadoop.home", this.hadoopHome);
+      this.codeOutputDir = props.getProperty("out.dir", this.codeOutputDir);
+      this.jarOutputDir = props.getProperty("bin.dir", this.jarOutputDir);
+      this.username = props.getProperty("db.username", this.username);
+      this.password = props.getProperty("db.password", this.password);
+      this.tableName = props.getProperty("db.table", this.tableName);
+      this.connectString = props.getProperty("db.connect.url", this.connectString);
+      this.orderByCol = props.getProperty("db.sort.column", this.orderByCol);
+      this.driverClassName = props.getProperty("jdbc.driver", this.driverClassName);
+      this.warehouseDir = props.getProperty("hdfs.warehouse.dir", this.warehouseDir);
+
+    } catch (IOException ioe) {
+      LOG.error("Could not read properties file " + DEFAULT_CONFIG_FILE + ": " + ioe.toString());
+    } finally {
+      if (null != istream) {
+        try {
+          istream.close();
+        } catch (IOException ioe) {
+          // ignore this; we're closing.
+        }
+      }
+    }
+  }
+
+  private void initDefaults() {
+    // first, set the true defaults if nothing else happens.
+    // default action is to run the full pipeline.
+    this.action = ControlAction.FullImport;
+    this.hadoopHome = System.getenv("HADOOP_HOME");
+    this.codeOutputDir = System.getProperty("sqoop.src.dir", ".");
+
+    String tmpDir = System.getProperty("test.build.data", "/tmp/");
+    if (!tmpDir.endsWith(File.separator)) {
+      tmpDir = tmpDir + File.separator;
+    }
+
+    this.jarOutputDir = tmpDir + "sqoop/compile";
+    this.layout = FileLayout.TextFile;
+
+    loadFromProperties();
+  }
+
+  /**
+   * Print usage strings for the program's arguments.
+   */
+  public static void printUsage() {
+    System.out.println("Usage: hadoop sqoop.jar org.apache.hadoop.sqoop.Sqoop (options)");
+    System.out.println("");
+    System.out.println("Database connection options:");
+    System.out.println("--connect (jdbc-uri)         Specify JDBC connect string");
+    System.out.println("--driver (class-name)        Manually specify JDBC driver class to use");
+    System.out.println("--username (username)        Set authentication username");
+    System.out.println("--password (password)        Set authentication password");
+    System.out.println("");
+    System.out.println("Import control options:");
+    System.out.println("--table (tablename)          Table to read");
+    System.out.println("--columns (col,col,col...)   Columns to export from table");
+    System.out.println("--order-by (column-name)     Column of the table used to order results");
+    System.out.println("--hadoop-home (dir)          Override $HADOOP_HOME");
+    System.out.println("--warehouse-dir (dir)        HDFS path for table destination");
+    System.out.println("--as-sequencefile            Imports data to SequenceFiles");
+    System.out.println("--as-textfile                Imports data as plain text (default)");
+    System.out.println("--all-tables                 Import all tables in database");
+    System.out.println("                             (Ignores --table, --columns and --order-by)");
+    System.out.println("");
+    System.out.println("Code generation options:");
+    System.out.println("--outdir (dir)               Output directory for generated code");
+    System.out.println("--bindir (dir)               Output directory for compiled objects");
+    System.out.println("--generate-only              Stop after code generation; do not import");
+    System.out.println("");
+    System.out.println("Additional commands:");
+    System.out.println("--list-tables                List tables in database and exit");
+    System.out.println("--list-databases             List all databases available and exit");
+    System.out.println("--debug-sql (statement)      Execute 'statement' in SQL and exit");
+    System.out.println("");
+    System.out.println("Generic Hadoop command-line options:");
+    ToolRunner.printGenericCommandUsage(System.out);
+    System.out.println("");
+    System.out.println("At minimum, you must specify --connect "
+        + "and either --table or --all-tables.");
+    System.out.println("Alternatively, you can specify --generate-only or one of the additional");
+    System.out.println("commands.");
+  }
+
+  /**
+   * Read args from the command-line into member fields.
+   * @throws Exception if there's a problem parsing arguments.
+   */
+  public void parse(String [] args) throws InvalidOptionsException {
+    int i = 0;
+    try {
+      for (i = 0; i < args.length; i++) {
+        if (args[i].equals("--connect")) {
+          this.connectString = args[++i];
+        } else if (args[i].equals("--driver")) {
+          this.driverClassName = args[++i];
+        } else if (args[i].equals("--table")) {
+          this.tableName = args[++i];
+        } else if (args[i].equals("--columns")) {
+          String columnString = args[++i];
+          this.columns = columnString.split(",");
+        } else if (args[i].equals("--order-by")) {
+          this.orderByCol = args[++i];
+        } else if (args[i].equals("--list-tables")) {
+          this.action = ControlAction.ListTables;
+        } else if (args[i].equals("--all-tables")) {
+          this.allTables = true;
+        } else if (args[i].equals("--username")) {
+          this.username = args[++i];
+          if (null == this.password) {
+            // Set password to empty if the username is set first,
+            // to ensure that they're either both null or neither.
+            this.password = "";
+          }
+        } else if (args[i].equals("--password")) {
+          this.password = args[++i];
+        } else if (args[i].equals("--hadoop-home")) {
+          this.hadoopHome = args[++i];
+        } else if (args[i].equals("--outdir")) {
+          this.codeOutputDir = args[++i];
+        } else if (args[i].equals("--as-sequencefile")) {
+          this.layout = FileLayout.SequenceFile;
+        } else if (args[i].equals("--as-textfile")) {
+          this.layout = FileLayout.TextFile;
+        } else if (args[i].equals("--bindir")) {
+          this.jarOutputDir = args[++i];
+        } else if (args[i].equals("--warehouse-dir")) {
+          this.warehouseDir = args[++i];
+        } else if (args[i].equals("--list-databases")) {
+          this.action = ControlAction.ListDatabases;
+        } else if (args[i].equals("--generate-only")) {
+          this.action = ControlAction.GenerateOnly;
+        } else if (args[i].equals("--debug-sql")) {
+          this.action = ControlAction.DebugExec;
+          // read the entire remainder of the commandline into the debug sql statement.
+          if (null == this.debugSqlCmd) {
+            this.debugSqlCmd = "";
+          }
+          for (i++; i < args.length; i++) {
+            this.debugSqlCmd = this.debugSqlCmd + args[i] + " ";
+          }
+        } else if (args[i].equals("--help")) {
+          printUsage();
+          throw new InvalidOptionsException("");
+        } else {
+          throw new InvalidOptionsException("Invalid argument: " + args[i] + ".\n"
+              + "Try --help for usage.");
+        }
+      }
+    } catch (ArrayIndexOutOfBoundsException oob) {
+      throw new InvalidOptionsException("Error: " + args[--i] + " expected argument.\n"
+          + "Try --help for usage.");
+    }
+  }
+
+  /**
+   * Validates options and ensures that any required options are
+   * present and that any mutually-exclusive options are not selected.
+   * @throws Exception if there's a problem.
+   */
+  public void validate() throws InvalidOptionsException {
+    if (this.allTables && this.columns != null) {
+      // If we're reading all tables in a database, can't filter column names.
+      throw new InvalidOptionsException("--columns and --all-tables are incompatible options."
+          + "\nTry --help for usage instructions.");
+    } else if (this.allTables && this.orderByCol != null) {
+      // If we're reading all tables in a database, can't set pkey
+      throw new InvalidOptionsException("--order-by and --all-tables are incompatible options."
+          + "\nTry --help for usage instructions.");
+    } else if (this.connectString == null) {
+      throw new InvalidOptionsException("Error: Required argument --connect is missing."
+          + "\nTry --help for usage instructions.");
+    }
+  }
+
+  public String getConnectString() {
+    return connectString;
+  }
+
+  public String getTableName() {
+    return tableName;
+  }
+
+  public String[] getColumns() {
+    if (null == columns) {
+      return null;
+    } else {
+      return Arrays.copyOf(columns, columns.length);
+    }
+  }
+
+  public String getOrderByCol() {
+    return orderByCol;
+  }
+
+  public ControlAction getAction() {
+    return action;
+  }
+
+  public boolean isAllTables() {
+    return allTables;
+  }
+
+  public String getUsername() {
+    return username;
+  }
+
+  public String getPassword() {
+    return password;
+  }
+
+  /**
+   * @return location where .java files go; guaranteed to end with '/'
+   */
+  public String getCodeOutputDir() {
+    if (codeOutputDir.endsWith(File.separator)) {
+      return codeOutputDir;
+    } else {
+      return codeOutputDir + File.separator;
+    }
+  }
+
+  /**
+   * @return location where .jar and .class files go; guaranteed to end with '/'
+   */
+  public String getJarOutputDir() {
+    if (jarOutputDir.endsWith(File.separator)) {
+      return jarOutputDir;
+    } else {
+      return jarOutputDir + File.separator;
+    }
+  }
+
+  /**
+   * Return the value of $HADOOP_HOME
+   * @return $HADOOP_HOME, or null if it's not set.
+   */
+  public String getHadoopHome() {
+    return hadoopHome;
+  }
+
+  /**
+   * @return a sql command to execute and exit with.
+   */
+  public String getDebugSqlCmd() {
+    return debugSqlCmd;
+  }
+
+  /**
+   * @return The JDBC driver class name specified with --driver
+   */
+  public String getDriverClassName() {
+    return driverClassName;
+  }
+
+  /**
+   * @return the base destination path for table uploads.
+   */
+  public String getWarehouseDir() {
+    return warehouseDir;
+  }
+
+  /**
+   * @return the destination file format
+   */
+  public FileLayout getFileLayout() {
+    return this.layout;
+  }
+}

Added: hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/Sqoop.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/Sqoop.java?rev=778646&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/Sqoop.java (added)
+++ hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/Sqoop.java Tue May 26 10:29:38 2009
@@ -0,0 +1,175 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.sqoop;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import org.apache.hadoop.sqoop.manager.ConnManager;
+import org.apache.hadoop.sqoop.orm.ClassWriter;
+import org.apache.hadoop.sqoop.orm.CompilationManager;
+import org.apache.hadoop.sqoop.util.ImportError;
+
+/**
+ * Main entry-point for Sqoop
+ * Usage: hadoop jar (this_jar_name) org.apache.hadoop.sqoop.Sqoop (options)
+ * See the ImportOptions class for options.
+ */
+public class Sqoop extends Configured implements Tool {
+
+  public static final Log LOG = LogFactory.getLog(Sqoop.class.getName());
+
+  private ImportOptions options;
+  private ConnManager manager;
+
+  public Sqoop() {
+  }
+
+  public ImportOptions getOptions() {
+    return options;
+  }
+
+  /**
+   * Generate the .class and .jar files
+   * @return the filename of the emitted jar file.
+   * @throws IOException
+   */
+  private String generateORM(String tableName) throws IOException {
+    LOG.info("Beginning code generation");
+    CompilationManager compileMgr = new CompilationManager(options);
+    ClassWriter classWriter = new ClassWriter(options, manager, tableName, compileMgr);
+    classWriter.generate();
+    compileMgr.compile();
+    compileMgr.jar();
+    return compileMgr.getJarFilename();
+  }
+
+  private void importTable(String tableName) throws IOException, ImportError {
+    String jarFile = null;
+
+    // Generate the ORM code for the tables.
+    // TODO(aaron): Allow this to be bypassed if the user has already generated code
+    jarFile = generateORM(tableName);
+
+    if (options.getAction() == ImportOptions.ControlAction.FullImport) {
+      // Proceed onward to do the import.
+      manager.importTable(tableName, jarFile, getConf());
+    }
+  }
+
+
+  /**
+   * Actual main entry-point for the program
+   */
+  public int run(String [] args) {
+    options = new ImportOptions();
+    try {
+      options.parse(args);
+      options.validate();
+    } catch (ImportOptions.InvalidOptionsException e) {
+      // display the error msg
+      System.err.println(e.getMessage());
+      return 1; // exit on exception here
+    }
+
+    // Get the connection to the database
+    try {
+      manager = ConnFactory.getManager(options);
+    } catch (Exception e) {
+      LOG.error("Got error creating database manager: " + e.toString());
+      return 1;
+    }
+
+    ImportOptions.ControlAction action = options.getAction();
+    if (action == ImportOptions.ControlAction.ListTables) {
+      String [] tables = manager.listTables();
+      if (null == tables) {
+        System.err.println("Could not retrieve tables list from server");
+        LOG.error("manager.listTables() returned null");
+        return 1;
+      } else {
+        for (String tbl : tables) {
+          System.out.println(tbl);
+        }
+      }
+    } else if (action == ImportOptions.ControlAction.ListDatabases) {
+      String [] databases = manager.listDatabases();
+      if (null == databases) {
+        System.err.println("Could not retrieve database list from server");
+        LOG.error("manager.listDatabases() returned null");
+        return 1;
+      } else {
+        for (String db : databases) {
+          System.out.println(db);
+        }
+      }
+    } else if (action == ImportOptions.ControlAction.DebugExec) {
+      // just run a SQL statement for debugging purposes.
+      manager.execAndPrint(options.getDebugSqlCmd());
+      return 0;
+    } else {
+      // This is either FullImport or GenerateOnly.
+
+      try {
+        if (options.isAllTables()) {
+          String [] tables = manager.listTables();
+          if (null == tables) {
+            System.err.println("Could not retrieve tables list from server");
+            LOG.error("manager.listTables() returned null");
+            return 1;
+          } else {
+            for (String tableName : tables) {
+              importTable(tableName);
+            }
+          }
+        } else {
+          // just import a single table the user specified.
+          importTable(options.getTableName());
+        }
+      } catch (IOException ioe) {
+        LOG.error("Encountered IOException running import job: " + ioe.toString());
+        return 1;
+      } catch (ImportError ie) {
+        LOG.error("Error during import: " + ie.toString());
+        return 1;
+      }
+    }
+
+    return 0;
+  }
+
+  public static void main(String [] args) {
+    int ret;
+    try {
+      Sqoop importer = new Sqoop();
+      ret = ToolRunner.run(importer, args);
+    } catch (Exception e) {
+      LOG.error("Got exception running Sqoop: " + e.toString());
+      e.printStackTrace();
+      ret = 1;
+    }
+
+    System.exit(ret);
+  }
+}

Added: hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/BigDecimalSerializer.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/BigDecimalSerializer.java?rev=778646&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/BigDecimalSerializer.java (added)
+++ hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/BigDecimalSerializer.java Tue May 26 10:29:38 2009
@@ -0,0 +1,84 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.sqoop.lib;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.math.BigInteger;
+
+import org.apache.hadoop.io.Text;
+
+/**
+ * Serialize BigDecimal classes to/from DataInput and DataOutput objects.
+ *
+ * BigDecimal is comprised of a BigInteger with an integer 'scale' field.
+ * The BigDecimal/BigInteger can also return itself as a 'long' value.
+ *
+ * We serialize in one of two formats:
+ *
+ *  First, check whether the BigInt can fit in a long:
+ *  boolean b = BigIntegerPart > LONG_MAX || BigIntegerPart < LONG_MIN
+ *
+ *  [int: scale][boolean: b == false][long: BigInt-part]
+ *  [int: scale][boolean: b == true][string: BigInt-part.toString()]
+ *
+ *
+ * 
+ *
+ * TODO(aaron): Get this to work with Hadoop's Serializations framework.
+ */
+public final class BigDecimalSerializer {
+
+  private BigDecimalSerializer() { }
+
+  static final BigInteger LONG_MAX_AS_BIGINT = BigInteger.valueOf(Long.MAX_VALUE);
+  static final BigInteger LONG_MIN_AS_BIGINT = BigInteger.valueOf(Long.MIN_VALUE);
+
+  public static void write(BigDecimal d, DataOutput out) throws IOException {
+    int scale = d.scale();
+    BigInteger bigIntPart = d.unscaledValue();
+    boolean fastpath = bigIntPart.compareTo(LONG_MAX_AS_BIGINT) < 0
+        && bigIntPart .compareTo(LONG_MIN_AS_BIGINT) > 0;
+
+    out.writeInt(scale);
+    out.writeBoolean(fastpath);
+    if (fastpath) {
+      out.writeLong(bigIntPart.longValue());
+    } else {
+      Text.writeString(out, bigIntPart.toString());
+    }
+  }
+
+  public static BigDecimal readFields(DataInput in) throws IOException {
+    int scale = in.readInt();
+    boolean fastpath = in.readBoolean();
+    BigInteger unscaledIntPart;
+    if (fastpath) {
+      long unscaledValue = in.readLong();
+      unscaledIntPart = BigInteger.valueOf(unscaledValue);
+    } else {
+      String unscaledValueStr = Text.readString(in);
+      unscaledIntPart = new BigInteger(unscaledValueStr);
+    }
+
+    return new BigDecimal(unscaledIntPart, scale);
+  }
+}

Added: hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/JdbcWritableBridge.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/JdbcWritableBridge.java?rev=778646&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/JdbcWritableBridge.java (added)
+++ hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/JdbcWritableBridge.java Tue May 26 10:29:38 2009
@@ -0,0 +1,203 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.sqoop.lib;
+
+import java.math.BigDecimal;
+import java.sql.Date;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Time;
+import java.sql.Timestamp;
+
+/**
+ * Contains a set of methods which can read db columns from a ResultSet into
+ * Java types, and do serialization of these types to/from DataInput/DataOutput
+ * for use with Hadoop's Writable implementation. This supports null values
+ * for all types.
+ *
+ * 
+ *
+ */
+public final class JdbcWritableBridge {
+
+  private JdbcWritableBridge() {
+  }
+
+  public static Integer readInteger(int colNum, ResultSet r) throws SQLException {
+    int val;
+    val = r.getInt(colNum);
+    if (r.wasNull()) {
+      return null;
+    } else {
+      return Integer.valueOf(val);
+    }
+  }
+
+  public static Long readLong(int colNum, ResultSet r) throws SQLException {
+    long val;
+    val = r.getLong(colNum);
+    if (r.wasNull()) {
+      return null;
+    } else {
+      return Long.valueOf(val);
+    }
+  }
+
+  public static String readString(int colNum, ResultSet r) throws SQLException {
+    return r.getString(colNum);
+  }
+
+  public static Float readFloat(int colNum, ResultSet r) throws SQLException {
+    float val;
+    val = r.getFloat(colNum);
+    if (r.wasNull()) {
+      return null;
+    } else {
+      return Float.valueOf(val);
+    }
+  }
+
+  public static Double readDouble(int colNum, ResultSet r) throws SQLException {
+    double val;
+    val = r.getDouble(colNum);
+    if (r.wasNull()) {
+      return null;
+    } else {
+      return Double.valueOf(val);
+    }
+  }
+
+  public static Boolean readBoolean(int colNum, ResultSet r) throws SQLException {
+    boolean val;
+    val = r.getBoolean(colNum);
+    if (r.wasNull()) {
+      return null;
+    } else {
+      return Boolean.valueOf(val);
+    }
+  }
+
+  public static Time readTime(int colNum, ResultSet r) throws SQLException {
+    return r.getTime(colNum);
+  }
+
+  public static Timestamp readTimestamp(int colNum, ResultSet r) throws SQLException {
+    return r.getTimestamp(colNum);
+  }
+
+  public static Date readDate(int colNum, ResultSet r) throws SQLException {
+    return r.getDate(colNum);
+  }
+
+  public static BigDecimal readBigDecimal(int colNum, ResultSet r) throws SQLException {
+    return r.getBigDecimal(colNum);
+  }
+
+  public static void writeInteger(Integer val, int paramIdx, int sqlType, PreparedStatement s)
+      throws SQLException {
+    if (null == val) {
+      s.setNull(paramIdx, sqlType);
+    } else {
+      s.setInt(paramIdx, val);
+    }
+  }
+
+  public static void writeLong(Long val, int paramIdx, int sqlType, PreparedStatement s)
+      throws SQLException {
+    if (null == val) {
+      s.setNull(paramIdx, sqlType);
+    } else {
+      s.setLong(paramIdx, val);
+    }
+  }
+
+  public static void writeDouble(Double val, int paramIdx, int sqlType, PreparedStatement s)
+      throws SQLException {
+    if (null == val) {
+      s.setNull(paramIdx, sqlType);
+    } else {
+      s.setDouble(paramIdx, val);
+    }
+  }
+
+  public static void writeBoolean(Boolean val, int paramIdx, int sqlType, PreparedStatement s)
+      throws SQLException {
+    if (null == val) {
+      s.setNull(paramIdx, sqlType);
+    } else {
+      s.setBoolean(paramIdx, val);
+    }
+  }
+
+  public static void writeFloat(Float val, int paramIdx, int sqlType, PreparedStatement s)
+      throws SQLException {
+    if (null == val) {
+      s.setNull(paramIdx, sqlType);
+    } else {
+      s.setFloat(paramIdx, val);
+    }
+  }
+
+  public static void writeString(String val, int paramIdx, int sqlType, PreparedStatement s)
+      throws SQLException {
+    if (null == val) {
+      s.setNull(paramIdx, sqlType);
+    } else {
+      s.setString(paramIdx, val);
+    }
+  }
+
+  public static void writeTimestamp(Timestamp val, int paramIdx, int sqlType, PreparedStatement s)
+      throws SQLException {
+    if (null == val) {
+      s.setNull(paramIdx, sqlType);
+    } else {
+      s.setTimestamp(paramIdx, val);
+    }
+  }
+
+  public static void writeTime(Time val, int paramIdx, int sqlType, PreparedStatement s)
+      throws SQLException {
+    if (null == val) {
+      s.setNull(paramIdx, sqlType);
+    } else {
+      s.setTime(paramIdx, val);
+    }
+  }
+
+  public static void writeDate(Date val, int paramIdx, int sqlType, PreparedStatement s)
+      throws SQLException {
+    if (null == val) {
+      s.setNull(paramIdx, sqlType);
+    } else {
+      s.setDate(paramIdx, val);
+    }
+  }
+
+  public static void writeBigDecimal(BigDecimal val, int paramIdx, int sqlType, PreparedStatement s)
+      throws SQLException {
+    if (null == val) {
+      s.setNull(paramIdx, sqlType);
+    } else {
+      s.setBigDecimal(paramIdx, val);
+    }
+  }
+
+}

Added: hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/ConnManager.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/ConnManager.java?rev=778646&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/ConnManager.java (added)
+++ hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/ConnManager.java Tue May 26 10:29:38 2009
@@ -0,0 +1,106 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.sqoop.manager;
+
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+
+import org.apache.hadoop.sqoop.util.ImportError;
+
+/**
+ * Abstract interface that manages connections to a database.
+ * The implementations of this class drive the actual discussion with
+ * the database about table formats, etc.
+ */
+public interface ConnManager {
+
+  /**
+   * Return a list of all databases on a server
+   */
+  String [] listDatabases();
+
+  /**
+   * Return a list of all tables in a database
+   */
+  String [] listTables();
+
+  /**
+   * Return a list of column names in a table in the order returned by the db.
+   */
+  String [] getColumnNames(String tableName);
+
+  /**
+   * Return the name of the primary key for a table, or null if there is none.
+   */
+  String getPrimaryKey(String tableName);
+
+  /**
+   * Return an unordered mapping from colname to sqltype for
+   * all columns in a table.
+   *
+   * The Integer type id is a constant from java.sql.Types
+   */
+  Map<String, Integer> getColumnTypes(String tableName);
+
+  /**
+   * Execute a SQL statement to read the named set of columns from a table.
+   * If columns is null, all columns from the table are read. This is a local
+   * (non-parallelized) read of the table back to the current client.
+   */
+  ResultSet readTable(String tableName, String [] columns) throws SQLException;
+
+  /**
+   * @return the actual database connection
+   */
+  Connection getConnection() throws SQLException;
+
+  /**
+   * Resolve a database-specific type to the Java type that should contain it.
+   * @param sqlType
+   * @return the name of a Java type to hold the sql datatype, or null if none.
+   */
+  String toJavaType(int sqlType);
+
+  /**
+   * @return a string identifying the driver class to load for this JDBC connection type.
+   */
+  String getDriverClass();
+
+  /**
+   * Execute a SQL statement 's' and print its results to stdout
+   */
+  void execAndPrint(String s);
+
+  /**
+   * Perform an import of a table from the database into HDFS
+   */
+  void importTable(String tableName, String jarFile, Configuration conf)
+      throws IOException, ImportError;
+
+  /**
+   * Perform any shutdown operations on the connection.
+   */
+  void close() throws SQLException;
+}
+

Added: hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/GenericJdbcManager.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/GenericJdbcManager.java?rev=778646&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/GenericJdbcManager.java (added)
+++ hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/GenericJdbcManager.java Tue May 26 10:29:38 2009
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.sqoop.manager;
+
+import java.sql.Connection;
+import java.sql.SQLException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import org.apache.hadoop.sqoop.ImportOptions;
+
+/**
+ * Database manager that is connects to a generic JDBC-compliant
+ * database; its constructor is parameterized on the JDBC Driver
+ * class to load.
+ *
+ * 
+ *
+ */
+public class GenericJdbcManager extends SqlManager {
+
+  public static final Log LOG = LogFactory.getLog(GenericJdbcManager.class.getName());
+
+  private String jdbcDriverClass;
+  private Connection connection;
+
+  public GenericJdbcManager(final String driverClass, final ImportOptions opts) {
+    super(opts);
+
+    this.jdbcDriverClass = driverClass;
+  }
+
+  @Override
+  public Connection getConnection() throws SQLException {
+    if (null == this.connection) {
+      this.connection = makeConnection();
+    }
+
+    return this.connection;
+  }
+
+  public void close() throws SQLException {
+    super.close();
+    if (null != this.connection) {
+      this.connection.close();
+    }
+  }
+
+  public String getDriverClass() {
+    return jdbcDriverClass;
+  }
+}
+

Added: hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/HsqldbManager.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/HsqldbManager.java?rev=778646&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/HsqldbManager.java (added)
+++ hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/HsqldbManager.java Tue May 26 10:29:38 2009
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.sqoop.manager;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import org.apache.hadoop.sqoop.ImportOptions;
+
+/**
+ * Manages connections to hsqldb databases.
+ * Extends generic SQL manager.
+ */
+public class HsqldbManager extends GenericJdbcManager implements ConnManager {
+
+  public static final Log LOG = LogFactory.getLog(HsqldbManager.class.getName());
+
+  // driver class to ensure is loaded when making db connection.
+  private static final String DRIVER_CLASS = "org.hsqldb.jdbcDriver";
+
+  // HsqlDb doesn't have a notion of multiple "databases"; the user's database is always called
+  // "PUBLIC";
+  private static final String HSQL_SCHEMA_NAME = "PUBLIC";
+
+  public HsqldbManager(final ImportOptions opts) {
+    super(DRIVER_CLASS, opts);
+  }
+
+  /**
+   * Note: HSqldb only supports a single schema named "PUBLIC"
+   */
+  @Override
+  public String[] listDatabases() {
+    String [] databases = {HSQL_SCHEMA_NAME};
+    return databases;
+  }
+}

Added: hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/MySQLManager.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/MySQLManager.java?rev=778646&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/MySQLManager.java (added)
+++ hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/MySQLManager.java Tue May 26 10:29:38 2009
@@ -0,0 +1,68 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.sqoop.manager;
+
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.ArrayList;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import org.apache.hadoop.sqoop.ImportOptions;
+
+/**
+ * Manages connections to MySQL databases
+ * 
+ *
+ */
+public class MySQLManager extends GenericJdbcManager {
+
+  public static final Log LOG = LogFactory.getLog(MySQLManager.class.getName());
+
+  // driver class to ensure is loaded when making db connection.
+  private static final String DRIVER_CLASS = "com.mysql.jdbc.Driver";
+
+  public MySQLManager(final ImportOptions opts) {
+    super(DRIVER_CLASS, opts);
+  }
+
+  @Override
+  public String[] listDatabases() {
+    // TODO(aaron): Add an automated unit test for this.
+
+    ResultSet results = execute("SHOW DATABASES");
+    if (null == results) {
+      return null;
+    }
+
+    try {
+      ArrayList<String> databases = new ArrayList<String>();
+      while (results.next()) {
+        String dbName = results.getString(1);
+        databases.add(dbName);
+      }
+
+      return databases.toArray(new String[0]);
+    } catch (SQLException sqlException) {
+      LOG.error("Error reading from database: " + sqlException.toString());
+      return null;
+    }
+  }
+}

Added: hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/SqlManager.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/SqlManager.java?rev=778646&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/SqlManager.java (added)
+++ hadoop/core/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/SqlManager.java Tue May 26 10:29:38 2009
@@ -0,0 +1,371 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.sqoop.manager;
+
+import org.apache.hadoop.sqoop.ImportOptions;
+import org.apache.hadoop.sqoop.mapred.ImportJob;
+import org.apache.hadoop.sqoop.util.ImportError;
+import org.apache.hadoop.sqoop.util.ResultSetPrinter;
+
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.DatabaseMetaData;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.sql.Types;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * ConnManager implementation for generic SQL-compliant database.
+ * This is an abstract class; it requires a database-specific
+ * ConnManager implementation to actually create the connection.
+ *
+ * 
+ *
+ */
+public abstract class SqlManager implements ConnManager {
+
+  public static final Log LOG = LogFactory.getLog(SqlManager.class.getName());
+
+  protected ImportOptions options;
+
+  /**
+   * Constructs the SqlManager
+   * @param opts
+   * @param specificMgr
+   */
+  public SqlManager(final ImportOptions opts) {
+    this.options = opts;
+  }
+
+  @Override
+  public String[] getColumnNames(String tableName) {
+    String stmt = "SELECT t.* FROM " + tableName + " AS t WHERE 1 = 1";
+
+    ResultSet results = execute(stmt);
+    if (null == results) {
+      return null;
+    }
+
+    try {
+      int cols = results.getMetaData().getColumnCount();
+      ArrayList<String> columns = new ArrayList<String>();
+      ResultSetMetaData metadata = results.getMetaData();
+      for (int i = 1; i < cols + 1; i++) {
+        String colName = metadata.getColumnName(i);
+        if (colName == null || colName.equals("")) {
+          colName = metadata.getColumnLabel(i);
+        }
+        columns.add(colName);
+      }
+      return columns.toArray(new String[0]);
+    } catch (SQLException sqlException) {
+      LOG.error("Error reading from database: " + sqlException.toString());
+      return null;
+    }
+  }
+
+  @Override
+  public Map<String, Integer> getColumnTypes(String tableName) {
+    String stmt = "SELECT t.* FROM " + tableName + " AS t WHERE 1 = 1";
+
+    ResultSet results = execute(stmt);
+    if (null == results) {
+      return null;
+    }
+
+    try {
+      Map<String, Integer> colTypes = new HashMap<String, Integer>();
+
+      int cols = results.getMetaData().getColumnCount();
+      ResultSetMetaData metadata = results.getMetaData();
+      for (int i = 1; i < cols + 1; i++) {
+        int typeId = metadata.getColumnType(i);
+        String colName = metadata.getColumnName(i);
+        if (colName == null || colName.equals("")) {
+          colName = metadata.getColumnLabel(i);
+        }
+
+        colTypes.put(colName, Integer.valueOf(typeId));
+      }
+
+      return colTypes;
+    } catch (SQLException sqlException) {
+      LOG.error("Error reading from database: " + sqlException.toString());
+      return null;
+    }
+  }
+
+  @Override
+  public ResultSet readTable(String tableName, String[] columns) throws SQLException {
+    if (columns == null) {
+      columns = getColumnNames(tableName);
+    }
+
+    StringBuilder sb = new StringBuilder();
+    sb.append("SELECT ");
+    boolean first = true;
+    for (String col : columns) {
+      if (!first) {
+        sb.append(", ");
+      }
+      sb.append(col);
+      first = false;
+    }
+    sb.append(" FROM ");
+    sb.append(tableName);
+    sb.append(" AS ");   // needed for hsqldb; doesn't hurt anyone else.
+    sb.append(tableName);
+
+    return execute(sb.toString());
+  }
+
+  @Override
+  public String[] listDatabases() {
+    // TODO(aaron): Implement this!
+    LOG.error("Generic SqlManager.listDatabases() not implemented.");
+    return null;
+  }
+
+  @Override
+  public String[] listTables() {
+    ResultSet results = null;
+    String [] tableTypes = {"TABLE"};
+    try {
+      DatabaseMetaData metaData = this.getConnection().getMetaData();
+      results = metaData.getTables(null, null, null, tableTypes);
+    } catch (SQLException sqlException) {
+      LOG.error("Error reading database metadata: " + sqlException.toString());
+      return null;
+    }
+
+    if (null == results) {
+      return null;
+    }
+
+    try {
+      ArrayList<String> tables = new ArrayList<String>();
+      while (results.next()) {
+        String tableName = results.getString("TABLE_NAME");
+        tables.add(tableName);
+      }
+
+      return tables.toArray(new String[0]);
+    } catch (SQLException sqlException) {
+      LOG.error("Error reading from database: " + sqlException.toString());
+      return null;
+    }
+  }
+
+  @Override
+  public String getPrimaryKey(String tableName) {
+    try {
+      DatabaseMetaData metaData = this.getConnection().getMetaData();
+      ResultSet results = metaData.getPrimaryKeys(null, null, tableName);
+      if (null == results) {
+        return null;
+      }
+
+      if (results.next()) {
+        return results.getString("COLUMN_NAME");
+      }
+    } catch (SQLException sqlException) {
+      LOG.error("Error reading primary key metadata: " + sqlException.toString());
+      return null;
+    }
+
+    return null;
+  }
+
+  /**
+   * Retrieve the actual connection from the outer ConnManager
+   */
+  public abstract Connection getConnection() throws SQLException;
+
+  /**
+   * Default implementation of importTable() is to launch a MapReduce job
+   * via ImportJob to read the table with DBInputFormat.
+   */
+  public void importTable(String tableName, String jarFile, Configuration conf)
+      throws IOException, ImportError {
+    ImportJob importer = new ImportJob(options);
+    String orderCol = options.getOrderByCol();
+    if (null == orderCol) {
+      // If the user didn't specify an ordering column, try to infer one.
+      orderCol = getPrimaryKey(tableName);
+    }
+
+    if (null == orderCol) {
+      // Can't infer a primary key.
+      throw new ImportError("No primary key could be found for table " + tableName
+          + ". Please specify one with --order-by.");
+    }
+
+    importer.runImport(tableName, jarFile, orderCol, conf);
+  }
+
+  /**
+   * executes an arbitrary SQL statement
+   * @param stmt The SQL statement to execute
+   * @return A ResultSet encapsulating the results or null on error
+   */
+  protected ResultSet execute(String stmt, Object... args) {
+    if (null == stmt) {
+      LOG.error("Null statement sent to SqlManager.execute()");
+      return null;
+    }
+
+    PreparedStatement statement = null;
+    try {
+      statement = this.getConnection().prepareStatement(stmt,
+          ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
+      if (null != args) {
+        for (int i = 0; i < args.length; i++) {
+          statement.setObject(i + 1, args[i]);
+        }
+      }
+
+      LOG.info("Executing SQL statement: " + stmt);
+      return statement.executeQuery();
+    } catch (SQLException sqlException) {
+      LOG.error("Error returned by SQL database: " + sqlException.toString());
+      return null;
+    }
+
+    // TODO(aaron): Is calling ResultSet.close() sufficient?
+    // Or must statement.close() be called too?
+  }
+
+  public String toJavaType(int sqlType) {
+    // mappings from http://java.sun.com/j2se/1.3/docs/guide/jdbc/getstart/mapping.html
+    if (sqlType == Types.INTEGER) {
+      return "Integer";
+    } else if (sqlType == Types.VARCHAR) {
+      return "String";
+    } else if (sqlType == Types.CHAR) {
+      return "String";
+    } else if (sqlType == Types.LONGVARCHAR) {
+      return "String";
+    } else if (sqlType == Types.NUMERIC) {
+      return "java.math.BigDecimal";
+    } else if (sqlType == Types.DECIMAL) {
+      return "java.math.BigDecimal";
+    } else if (sqlType == Types.BIT) {
+      return "Boolean";
+    } else if (sqlType == Types.BOOLEAN) {
+      return "Boolean";
+    } else if (sqlType == Types.TINYINT) {
+      return "Integer";
+    } else if (sqlType == Types.SMALLINT) {
+      return "Integer";
+    } else if (sqlType == Types.BIGINT) {
+      return "Long";
+    } else if (sqlType == Types.REAL) {
+      return "Float";
+    } else if (sqlType == Types.FLOAT) {
+      return "Double";
+    } else if (sqlType == Types.DOUBLE) {
+      return "Double";
+    } else if (sqlType == Types.DATE) {
+      return "java.sql.Date";
+    } else if (sqlType == Types.TIME) {
+      return "java.sql.Time";
+    } else if (sqlType == Types.TIMESTAMP) {
+      return "java.sql.Timestamp";
+    } else {
+      // TODO(aaron): Support BINARY, VARBINARY, LONGVARBINARY, DISTINCT, CLOB, BLOB, ARRAY,
+      // STRUCT, REF, JAVA_OBJECT.
+      return null;
+    }
+  }
+
+
+  public void close() throws SQLException {
+  }
+
+  /**
+   * Poor man's SQL query interface; used for debugging.
+   * @param s
+   */
+  public void execAndPrint(String s) {
+    System.out.println("Executing statement: " + s);
+    ResultSet results = execute(s);
+    if (null == results) {
+      LOG.info("Got null results back!");
+      return;
+    }
+
+    try {
+      int cols = results.getMetaData().getColumnCount();
+      System.out.println("Got " + cols + " columns back");
+      if (cols > 0) {
+        System.out.println("Schema: " + results.getMetaData().getSchemaName(1));
+        System.out.println("Table: " + results.getMetaData().getTableName(1));
+      }
+    } catch (SQLException sqlE) {
+      LOG.error("SQLException reading result metadata: " + sqlE.toString());
+    }
+
+    try {
+      new ResultSetPrinter().printResultSet(System.out, results);
+    } catch (IOException ioe) {
+      LOG.error("IOException writing results to stdout: " + ioe.toString());
+      return;
+    }
+  }
+
+  /**
+   * Create a connection to the database; usually used only from within
+   * getConnection(), which enforces a singleton guarantee around the
+   * Connection object.
+   */
+  protected Connection makeConnection() throws SQLException {
+
+    Connection connection;
+    String driverClass = getDriverClass();
+
+    try {
+      Class.forName(driverClass);
+    } catch (ClassNotFoundException cnfe) {
+      throw new RuntimeException("Could not load db driver class: " + driverClass);
+    }
+
+    String username = options.getUsername();
+    String password = options.getPassword();
+    if (null == username) {
+      connection = DriverManager.getConnection(options.getConnectString());
+    } else {
+      connection = DriverManager.getConnection(options.getConnectString(), username, password);
+    }
+
+    connection.setAutoCommit(false);
+    connection.setTransactionIsolation(Connection.TRANSACTION_SERIALIZABLE);
+
+    return connection;
+  }
+}