You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sqoop.apache.org by ja...@apache.org on 2013/10/23 21:01:17 UTC

git commit: SQOOP-435: Avro import should write the Schema to a file

Updated Branches:
  refs/heads/trunk 840711812 -> a555a1f31


SQOOP-435: Avro import should write the Schema to a file

(James Anderson via Jarek Jarcec Cecho)


Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/a555a1f3
Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/a555a1f3
Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/a555a1f3

Branch: refs/heads/trunk
Commit: a555a1f31a5546083a9fb0c3e9af17ad996d5d18
Parents: 8407118
Author: Jarek Jarcec Cecho <ja...@apache.org>
Authored: Wed Oct 23 11:59:55 2013 -0700
Committer: Jarek Jarcec Cecho <ja...@apache.org>
Committed: Wed Oct 23 11:59:55 2013 -0700

----------------------------------------------------------------------
 .../sqoop/mapreduce/DataDrivenImportJob.java    | 27 ++++++++++++++++++++
 src/test/com/cloudera/sqoop/TestAvroImport.java |  8 ++++++
 2 files changed, 35 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/sqoop/blob/a555a1f3/src/java/org/apache/sqoop/mapreduce/DataDrivenImportJob.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/mapreduce/DataDrivenImportJob.java b/src/java/org/apache/sqoop/mapreduce/DataDrivenImportJob.java
index 5afd90c..172e822 100644
--- a/src/java/org/apache/sqoop/mapreduce/DataDrivenImportJob.java
+++ b/src/java/org/apache/sqoop/mapreduce/DataDrivenImportJob.java
@@ -18,9 +18,12 @@
 
 package org.apache.sqoop.mapreduce;
 
+import java.io.File;
 import java.io.IOException;
 import java.sql.SQLException;
+
 import org.apache.avro.Schema;
+import org.apache.commons.io.FileUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.io.LongWritable;
@@ -32,6 +35,7 @@ import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.OutputFormat;
 import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
 import org.apache.sqoop.mapreduce.hcat.SqoopHCatUtilities;
+
 import com.cloudera.sqoop.SqoopOptions;
 import com.cloudera.sqoop.config.ConfigurationHelper;
 import com.cloudera.sqoop.lib.LargeObjectLoader;
@@ -83,12 +87,35 @@ public class DataDrivenImportJob extends ImportJobBase {
       AvroSchemaGenerator generator = new AvroSchemaGenerator(options,
           connManager, tableName);
       Schema schema = generator.generate();
+
+      try {
+        writeAvroSchema(schema);
+      } catch (final IOException e) {
+        LOG.error("Error while writing Avro schema.", e);
+      }
+
       AvroJob.setMapOutputSchema(job.getConfiguration(), schema);
     }
 
     job.setMapperClass(getMapperClass());
   }
 
+  private void writeAvroSchema(final Schema schema) throws IOException {
+    // Generate schema in JAR output directory.
+    final File schemaFile = new File(options.getJarOutputDir(), schema.getName() + ".avsc");
+
+    LOG.info("Writing Avro schema file: " + schemaFile);
+    FileUtils.forceMkdir(schemaFile.getParentFile());
+    FileUtils.writeStringToFile(schemaFile, schema.toString(true), null);
+
+    // Copy schema to code output directory.
+    try {
+      FileUtils.moveFileToDirectory(schemaFile, new File(options.getCodeOutputDir()), true);
+    } catch (final IOException e) {
+      LOG.debug("Could not move Avro schema file to code output directory.", e);
+    }
+  }
+
   @Override
   protected Class<? extends Mapper> getMapperClass() {
     if (options.getHCatTableName() != null) {

http://git-wip-us.apache.org/repos/asf/sqoop/blob/a555a1f3/src/test/com/cloudera/sqoop/TestAvroImport.java
----------------------------------------------------------------------
diff --git a/src/test/com/cloudera/sqoop/TestAvroImport.java b/src/test/com/cloudera/sqoop/TestAvroImport.java
index 34a7d41..440e76c 100644
--- a/src/test/com/cloudera/sqoop/TestAvroImport.java
+++ b/src/test/com/cloudera/sqoop/TestAvroImport.java
@@ -18,6 +18,7 @@
 
 package com.cloudera.sqoop;
 
+import java.io.File;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.sql.SQLException;
@@ -157,6 +158,8 @@ public class TestAvroImport extends ImportJobTestCase {
     if (codec != null) {
       assertEquals(codec, reader.getMetaString(DataFileConstants.CODEC));
     }
+
+    checkSchemaFile(schema);
   }
 
   public void testOverrideTypeMapping() throws IOException {
@@ -235,4 +238,9 @@ public class TestAvroImport extends ImportJobTestCase {
     return new DataFileReader<GenericRecord>(fsInput, datumReader);
   }
 
+  private void checkSchemaFile(final Schema schema) throws IOException {
+    final File schemaFile = new File(schema.getName() + ".avsc");
+    assertTrue(schemaFile.exists());
+    assertEquals(schema, new Schema.Parser().parse(schemaFile));
+  }
 }