You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sqoop.apache.org by ja...@apache.org on 2013/01/31 18:27:23 UTC
git commit: SQOOP-830: HBase import formatting BigDecimal
inconsistently
Updated Branches:
refs/heads/trunk 0488503a3 -> 7c5b46fb2
SQOOP-830: HBase import formatting BigDecimal inconsistently
(David Robson via Jarek Jarcec Cecho)
Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/7c5b46fb
Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/7c5b46fb
Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/7c5b46fb
Branch: refs/heads/trunk
Commit: 7c5b46fb2860e7401e84542a01a61ef415cbe519
Parents: 0488503
Author: Jarek Jarcec Cecho <ja...@apache.org>
Authored: Thu Jan 31 09:26:42 2013 -0800
Committer: Jarek Jarcec Cecho <ja...@apache.org>
Committed: Thu Jan 31 09:26:42 2013 -0800
----------------------------------------------------------------------
src/docs/user/import.txt | 34 +++++
.../org/apache/sqoop/hbase/HBasePutProcessor.java | 7 +
.../apache/sqoop/hbase/ToStringPutTransformer.java | 16 ++-
.../apache/sqoop/mapreduce/AvroImportMapper.java | 16 ++-
.../org/apache/sqoop/mapreduce/ImportJobBase.java | 8 +
src/java/org/apache/sqoop/orm/ClassWriter.java | 11 ++
.../cloudera/sqoop/testutil/BaseSqoopTestCase.java | 54 ++++---
.../org/apache/sqoop/TestBigDecimalExport.java | 112 +++++++++++++++
.../org/apache/sqoop/TestBigDecimalImport.java | 86 +++++++++++
9 files changed, 315 insertions(+), 29 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/sqoop/blob/7c5b46fb/src/docs/user/import.txt
----------------------------------------------------------------------
diff --git a/src/docs/user/import.txt b/src/docs/user/import.txt
index 82e74dd..9bc4fc9 100644
--- a/src/docs/user/import.txt
+++ b/src/docs/user/import.txt
@@ -575,6 +575,40 @@ $ sqoop import --table SomeTable --jar-file mydatatypes.jar \
This command will load the +SomeTableType+ class out of +mydatatypes.jar+.
+Additional Import Configuration Properties
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+There are some additional properties which can be configured by modifying
++conf/sqoop-site.xml+. Properties can be specified the same as in Hadoop
+configuration files, for example:
+
+----
+ <property>
+ <name>property.name</name>
+ <value>property.value</value>
+ </property>
+----
+
+They can also be specified on the command line in the generic arguments, for
+example:
+
+----
+sqoop import -D property.name=property.value ...
+----
+
+.Additional import configuration properties:
+[grid="all"]
+`-------------------------------------`----------------------------------------
+Argument Description
+-------------------------------------------------------------------------------
++sqoop.bigdecimal.format.string+ Controls how BigDecimal columns will \
+ formatted when stored as a String. A \
+ value of +true+ (default) will use \
+ toPlainString to store them without an \
+ exponent component (0.0000001); while \
+ a value of +false+ will use toString \
+ which may include an exponent (1E-7)
+-------------------------------------------------------------------------------
+
Example Invocations
~~~~~~~~~~~~~~~~~~~
http://git-wip-us.apache.org/repos/asf/sqoop/blob/7c5b46fb/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java b/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java
index 64a1d18..cca641f 100644
--- a/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java
+++ b/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java
@@ -28,6 +28,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.sqoop.mapreduce.ImportJobBase;
import com.cloudera.sqoop.lib.FieldMappable;
import com.cloudera.sqoop.lib.FieldMapProcessor;
@@ -90,6 +91,12 @@ public class HBasePutProcessor implements Closeable, Configurable,
this.putTransformer.setColumnFamily(conf.get(COL_FAMILY_KEY, null));
this.putTransformer.setRowKeyColumn(conf.get(ROW_KEY_COLUMN_KEY, null));
+ if (this.putTransformer instanceof ToStringPutTransformer) {
+ ((ToStringPutTransformer) this.putTransformer).bigDecimalFormatString =
+ conf.getBoolean(ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT,
+ ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT_DEFAULT);
+ }
+
this.tableName = conf.get(TABLE_NAME_KEY, null);
try {
this.table = new HTable(conf, this.tableName);
http://git-wip-us.apache.org/repos/asf/sqoop/blob/7c5b46fb/src/java/org/apache/sqoop/hbase/ToStringPutTransformer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/hbase/ToStringPutTransformer.java b/src/java/org/apache/sqoop/hbase/ToStringPutTransformer.java
index 1f52ba9..131fd43 100644
--- a/src/java/org/apache/sqoop/hbase/ToStringPutTransformer.java
+++ b/src/java/org/apache/sqoop/hbase/ToStringPutTransformer.java
@@ -19,6 +19,7 @@
package org.apache.sqoop.hbase;
import java.io.IOException;
+import java.math.BigDecimal;
import java.util.Collections;
import java.util.List;
import java.util.Map;
@@ -42,6 +43,7 @@ public class ToStringPutTransformer extends PutTransformer {
// A mapping from field name -> bytes for that field name.
// Used to cache serialization work done for fields names.
private Map<String, byte[]> serializedFieldNames;
+ protected boolean bigDecimalFormatString;
public ToStringPutTransformer() {
serializedFieldNames = new TreeMap<String, byte[]>();
@@ -81,7 +83,7 @@ public class ToStringPutTransformer extends PutTransformer {
return null;
}
- Put put = new Put(Bytes.toBytes(rowKey.toString()));
+ Put put = new Put(Bytes.toBytes(toHBaseString(rowKey)));
for (Map.Entry<String, Object> fieldEntry : fields.entrySet()) {
String colName = fieldEntry.getKey();
@@ -91,7 +93,7 @@ public class ToStringPutTransformer extends PutTransformer {
Object val = fieldEntry.getValue();
if (null != val) {
put.add(colFamilyBytes, getFieldNameBytes(colName),
- Bytes.toBytes(val.toString()));
+ Bytes.toBytes(toHBaseString(val)));
}
}
}
@@ -99,4 +101,14 @@ public class ToStringPutTransformer extends PutTransformer {
return Collections.singletonList(put);
}
+ private String toHBaseString(Object val) {
+ String valString;
+ if (val instanceof BigDecimal && bigDecimalFormatString) {
+ valString = ((BigDecimal) val).toPlainString();
+ } else {
+ valString = val.toString();
+ }
+ return valString;
+ }
+
}
http://git-wip-us.apache.org/repos/asf/sqoop/blob/7c5b46fb/src/java/org/apache/sqoop/mapreduce/AvroImportMapper.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/mapreduce/AvroImportMapper.java b/src/java/org/apache/sqoop/mapreduce/AvroImportMapper.java
index 30db288..289eb28 100644
--- a/src/java/org/apache/sqoop/mapreduce/AvroImportMapper.java
+++ b/src/java/org/apache/sqoop/mapreduce/AvroImportMapper.java
@@ -30,6 +30,7 @@ import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.mapred.AvroWrapper;
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
@@ -51,13 +52,18 @@ public class AvroImportMapper
new AvroWrapper<GenericRecord>();
private Schema schema;
private LargeObjectLoader lobLoader;
+ private boolean bigDecimalFormatString;
@Override
protected void setup(Context context)
throws IOException, InterruptedException {
- schema = AvroJob.getMapOutputSchema(context.getConfiguration());
- lobLoader = new LargeObjectLoader(context.getConfiguration(),
+ Configuration conf = context.getConfiguration();
+ schema = AvroJob.getMapOutputSchema(conf);
+ lobLoader = new LargeObjectLoader(conf,
FileOutputFormat.getWorkOutputPath(context));
+ bigDecimalFormatString = conf.getBoolean(
+ ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT,
+ ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT_DEFAULT);
}
@Override
@@ -99,7 +105,11 @@ public class AvroImportMapper
*/
private Object toAvro(Object o) {
if (o instanceof BigDecimal) {
- return o.toString();
+ if (bigDecimalFormatString) {
+ return ((BigDecimal)o).toPlainString();
+ } else {
+ return o.toString();
+ }
} else if (o instanceof Date) {
return ((Date) o).getTime();
} else if (o instanceof Time) {
http://git-wip-us.apache.org/repos/asf/sqoop/blob/7c5b46fb/src/java/org/apache/sqoop/mapreduce/ImportJobBase.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/mapreduce/ImportJobBase.java b/src/java/org/apache/sqoop/mapreduce/ImportJobBase.java
index f6e2e72..f766532 100644
--- a/src/java/org/apache/sqoop/mapreduce/ImportJobBase.java
+++ b/src/java/org/apache/sqoop/mapreduce/ImportJobBase.java
@@ -58,6 +58,14 @@ public class ImportJobBase extends JobBase {
public static final Log LOG = LogFactory.getLog(
ImportJobBase.class.getName());
+ /** Controls how java.math.BigDecimal values should be converted to Strings
+ * If set to true (default) then will call toPlainString() method.
+ * If set to false then will call toString() method.
+ */
+ public static final String PROPERTY_BIGDECIMAL_FORMAT =
+ "sqoop.bigdecimal.format.string";
+ public static final boolean PROPERTY_BIGDECIMAL_FORMAT_DEFAULT = true;
+
public ImportJobBase() {
this(null);
}
http://git-wip-us.apache.org/repos/asf/sqoop/blob/7c5b46fb/src/java/org/apache/sqoop/orm/ClassWriter.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/orm/ClassWriter.java b/src/java/org/apache/sqoop/orm/ClassWriter.java
index 126b406..136982c 100644
--- a/src/java/org/apache/sqoop/orm/ClassWriter.java
+++ b/src/java/org/apache/sqoop/orm/ClassWriter.java
@@ -33,6 +33,7 @@ import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.BytesWritable;
+import org.apache.sqoop.mapreduce.ImportJobBase;
import com.cloudera.sqoop.SqoopOptions;
import com.cloudera.sqoop.manager.ConnManager;
@@ -116,6 +117,7 @@ public class ClassWriter {
private ConnManager connManager;
private String tableName;
private CompilationManager compileManager;
+ private boolean bigDecimalFormatString;
/**
* Creates a new ClassWriter to generate an ORM class for a table
@@ -131,6 +133,9 @@ public class ClassWriter {
this.connManager = connMgr;
this.tableName = table;
this.compileManager = compMgr;
+ this.bigDecimalFormatString = this.options.getConf().getBoolean(
+ ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT,
+ ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT_DEFAULT);
}
/**
@@ -317,6 +322,12 @@ public class ClassWriter {
String r = colName + "==null?\"" + this.options.getNullStringValue()
+ "\":" + colName;
return r;
+ } else if (javaType.equals("java.math.BigDecimal")
+ && this.bigDecimalFormatString) {
+ // Use toPlainString method for BigDecimals if option is set
+ String r = colName + "==null?\"" + this.options.getNullNonStringValue()
+ + "\":" + colName + ".toPlainString()";
+ return r;
} else {
// This is an object type -- just call its toString() in a null-safe way.
// Also check if it is null, and instead write the null representation
http://git-wip-us.apache.org/repos/asf/sqoop/blob/7c5b46fb/src/test/com/cloudera/sqoop/testutil/BaseSqoopTestCase.java
----------------------------------------------------------------------
diff --git a/src/test/com/cloudera/sqoop/testutil/BaseSqoopTestCase.java b/src/test/com/cloudera/sqoop/testutil/BaseSqoopTestCase.java
index f9370c4..cf41b96 100644
--- a/src/test/com/cloudera/sqoop/testutil/BaseSqoopTestCase.java
+++ b/src/test/com/cloudera/sqoop/testutil/BaseSqoopTestCase.java
@@ -312,13 +312,9 @@ public abstract class BaseSqoopTestCase extends TestCase {
for (int i = 0; i < colTypes.length; i++) {
String colName = BASE_COL_NAME + Integer.toString(i);
columnDefStr += colName + " " + colTypes[i];
- columnListStr += colName;
- valueListStr += vals[i];
myColNames[i] = colName;
if (i < colTypes.length - 1) {
columnDefStr += ", ";
- columnListStr += ", ";
- valueListStr += ", ";
}
}
@@ -344,27 +340,37 @@ public abstract class BaseSqoopTestCase extends TestCase {
}
}
- try {
- String insertValsStr = "INSERT INTO " + getTableName()
- + "(" + columnListStr + ")"
- + " VALUES(" + valueListStr + ")";
- LOG.info("Inserting values: " + insertValsStr);
- statement = conn.prepareStatement(
- insertValsStr,
- ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
- statement.executeUpdate();
- } catch (SQLException sqlException) {
- fail("Could not create table: "
- + StringUtils.stringifyException(sqlException));
- } finally {
- if (null != statement) {
- try {
- statement.close();
- } catch (SQLException se) {
- // Ignore exception on close.
+ if (vals!=null) {
+ for (int i = 0; i < colTypes.length; i++) {
+ columnListStr += myColNames[i];
+ valueListStr += vals[i];
+ if (i < colTypes.length - 1) {
+ columnListStr += ", ";
+ valueListStr += ", ";
+ }
+ }
+ try {
+ String insertValsStr = "INSERT INTO " + getTableName()
+ + "(" + columnListStr + ")"
+ + " VALUES(" + valueListStr + ")";
+ LOG.info("Inserting values: " + insertValsStr);
+ statement = conn.prepareStatement(
+ insertValsStr,
+ ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
+ statement.executeUpdate();
+ } catch (SQLException sqlException) {
+ fail("Could not create table: "
+ + StringUtils.stringifyException(sqlException));
+ } finally {
+ if (null != statement) {
+ try {
+ statement.close();
+ } catch (SQLException se) {
+ // Ignore exception on close.
+ }
+
+ statement = null;
}
-
- statement = null;
}
}
http://git-wip-us.apache.org/repos/asf/sqoop/blob/7c5b46fb/src/test/org/apache/sqoop/TestBigDecimalExport.java
----------------------------------------------------------------------
diff --git a/src/test/org/apache/sqoop/TestBigDecimalExport.java b/src/test/org/apache/sqoop/TestBigDecimalExport.java
new file mode 100644
index 0000000..80cdad5
--- /dev/null
+++ b/src/test/org/apache/sqoop/TestBigDecimalExport.java
@@ -0,0 +1,112 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.sqoop;
+
+import java.io.BufferedWriter;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.math.BigDecimal;
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import com.cloudera.sqoop.testutil.CommonArgs;
+import com.cloudera.sqoop.testutil.ExportJobTestCase;
+
+/**
+ * Test exporting lines that are created via both options of
+ * sqoop.bigdecimal.format.string parameter.
+ */
+public class TestBigDecimalExport extends ExportJobTestCase {
+
+ private void runBigDecimalExport(String line)
+ throws IOException, SQLException {
+ FileSystem fs = FileSystem.get(getConf());
+ Path tablePath = getTablePath();
+ fs.mkdirs(tablePath);
+ Path filePath = getDataFilePath();
+ DataOutputStream stream = fs.create(filePath);
+ BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(stream));
+ writer.write(line);
+ writer.close();
+ String[] types =
+ { "DECIMAL", "NUMERIC" };
+ createTableWithColTypes(types, null);
+
+ List<String> args = new ArrayList<String>();
+
+ CommonArgs.addHadoopFlags(args);
+
+ args.add("--table");
+ args.add(getTableName());
+ args.add("--export-dir");
+ args.add(tablePath.toString());
+ args.add("--connect");
+ args.add(getConnectString());
+ args.add("-m");
+ args.add("1");
+
+ runExport(args.toArray(new String[args.size()]));
+
+ BigDecimal actual1 = null;
+ BigDecimal actual2 = null;
+
+ Connection conn = getConnection();
+ try {
+ PreparedStatement stmt = conn.prepareStatement("SELECT * FROM "
+ + getTableName());
+ try {
+ ResultSet rs = stmt.executeQuery();
+ try {
+ rs.next();
+ actual1 = rs.getBigDecimal(1);
+ actual2 = rs.getBigDecimal(2);
+ } finally {
+ rs.close();
+ }
+ } finally {
+ stmt.close();
+ }
+ } finally {
+ conn.close();
+ }
+
+ BigDecimal expected1 = new BigDecimal("0.000001");
+ BigDecimal expected2 = new BigDecimal("0.0000001");
+
+ assertEquals(expected1, actual1);
+ assertEquals(expected2, actual2);
+ }
+
+ public void testBigDecimalDefault() throws IOException, SQLException {
+ runBigDecimalExport("0.000001,0.0000001");
+ }
+
+ public void testBigDecimalNoFormat() throws IOException, SQLException {
+ runBigDecimalExport("0.000001,1E-7");
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/sqoop/blob/7c5b46fb/src/test/org/apache/sqoop/TestBigDecimalImport.java
----------------------------------------------------------------------
diff --git a/src/test/org/apache/sqoop/TestBigDecimalImport.java b/src/test/org/apache/sqoop/TestBigDecimalImport.java
new file mode 100644
index 0000000..76e4704
--- /dev/null
+++ b/src/test/org/apache/sqoop/TestBigDecimalImport.java
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.sqoop;
+
+import java.io.BufferedReader;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import com.cloudera.sqoop.testutil.CommonArgs;
+import com.cloudera.sqoop.testutil.ImportJobTestCase;
+
+/**
+ * Test the sqoop.bigdecimal.format.string parameter default behavior and when
+ * set to false.
+ */
+public class TestBigDecimalImport extends ImportJobTestCase {
+
+ private String runBigDecimalImport(List<String> extraArgs)
+ throws IOException {
+ String[] types =
+ { "DECIMAL", "NUMERIC" };
+ String[] vals = { "0.000001", "0.0000001" };
+ createTableWithColTypes(types, vals);
+ List<String> args = new ArrayList<String>();
+
+ CommonArgs.addHadoopFlags(args);
+
+ if (extraArgs!=null) {
+ args.addAll(extraArgs);
+ }
+ args.add("--table");
+ args.add(getTableName());
+ args.add("--warehouse-dir");
+ args.add(getWarehouseDir());
+ args.add("--connect");
+ args.add(getConnectString());
+ args.add("-m");
+ args.add("1");
+
+ runImport(args.toArray(new String[args.size()]));
+
+ Path outputFile = getDataFilePath();
+ FileSystem fs = FileSystem.get(getConf());
+ DataInputStream stream = fs.open(outputFile);
+ BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
+ String line = reader.readLine();
+ reader.close();
+ return line;
+ }
+
+ public void testBigDecimalDefault() throws IOException {
+ String line = runBigDecimalImport(null);
+ assertEquals("0.000001,0.0000001", line);
+ }
+
+ public void testBigDecimalNoFormat() throws IOException {
+ List<String> args = new ArrayList<String>();
+ args.add("-Dsqoop.bigdecimal.format.string=false");
+
+ String line = runBigDecimalImport(args);
+ assertEquals("0.000001,1E-7", line);
+ }
+
+}