You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2015/02/13 00:17:30 UTC
spark git commit: [SPARK-5758][SQL] Use LongType as the default type
for integers in JSON schema inference.
Repository: spark
Updated Branches:
refs/heads/master 0bf031582 -> c352ffbdb
[SPARK-5758][SQL] Use LongType as the default type for integers in JSON schema inference.
Author: Yin Huai <yh...@databricks.com>
Closes #4544 from yhuai/jsonUseLongTypeByDefault and squashes the following commits:
6e2ffc2 [Yin Huai] Use LongType as the default type for integers in JSON schema inference.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c352ffbd
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c352ffbd
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c352ffbd
Branch: refs/heads/master
Commit: c352ffbdb9112714c176a747edff6115e9369e58
Parents: 0bf0315
Author: Yin Huai <yh...@databricks.com>
Authored: Thu Feb 12 15:17:25 2015 -0800
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Thu Feb 12 15:17:25 2015 -0800
----------------------------------------------------------------------
.../scala/org/apache/spark/sql/json/JsonRDD.scala | 12 ++++++++----
.../spark/sql/api/java/JavaApplySchemaSuite.java | 2 +-
.../scala/org/apache/spark/sql/json/JsonSuite.scala | 16 ++++++++--------
3 files changed, 17 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/c352ffbd/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
index 1043eef..7dfb304 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
@@ -17,13 +17,12 @@
package org.apache.spark.sql.json
-import java.io.StringWriter
-import java.sql.{Date, Timestamp}
+import java.sql.Timestamp
import scala.collection.Map
import scala.collection.convert.Wrappers.{JMapWrapper, JListWrapper}
-import com.fasterxml.jackson.core.{JsonGenerator, JsonProcessingException, JsonFactory}
+import com.fasterxml.jackson.core.{JsonGenerator, JsonProcessingException}
import com.fasterxml.jackson.databind.ObjectMapper
import org.apache.spark.rdd.RDD
@@ -178,7 +177,12 @@ private[sql] object JsonRDD extends Logging {
}
private def typeOfPrimitiveValue: PartialFunction[Any, DataType] = {
- ScalaReflection.typeOfObject orElse {
+ // For Integer values, use LongType by default.
+ val useLongType: PartialFunction[Any, DataType] = {
+ case value: IntegerType.JvmType => LongType
+ }
+
+ useLongType orElse ScalaReflection.typeOfObject orElse {
// Since we do not have a data type backed by BigInteger,
// when we see a Java BigInteger, we use DecimalType.
case value: java.math.BigInteger => DecimalType.Unlimited
http://git-wip-us.apache.org/repos/asf/spark/blob/c352ffbd/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaApplySchemaSuite.java
----------------------------------------------------------------------
diff --git a/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaApplySchemaSuite.java b/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaApplySchemaSuite.java
index 2e6e977..643b891 100644
--- a/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaApplySchemaSuite.java
+++ b/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaApplySchemaSuite.java
@@ -164,7 +164,7 @@ public class JavaApplySchemaSuite implements Serializable {
fields.add(DataTypes.createStructField("bigInteger", DataTypes.createDecimalType(), true));
fields.add(DataTypes.createStructField("boolean", DataTypes.BooleanType, true));
fields.add(DataTypes.createStructField("double", DataTypes.DoubleType, true));
- fields.add(DataTypes.createStructField("integer", DataTypes.IntegerType, true));
+ fields.add(DataTypes.createStructField("integer", DataTypes.LongType, true));
fields.add(DataTypes.createStructField("long", DataTypes.LongType, true));
fields.add(DataTypes.createStructField("null", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("string", DataTypes.StringType, true));
http://git-wip-us.apache.org/repos/asf/spark/blob/c352ffbd/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
index fde4b47..b5f13f8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
@@ -222,7 +222,7 @@ class JsonSuite extends QueryTest {
StructField("bigInteger", DecimalType.Unlimited, true) ::
StructField("boolean", BooleanType, true) ::
StructField("double", DoubleType, true) ::
- StructField("integer", IntegerType, true) ::
+ StructField("integer", LongType, true) ::
StructField("long", LongType, true) ::
StructField("null", StringType, true) ::
StructField("string", StringType, true) :: Nil)
@@ -252,7 +252,7 @@ class JsonSuite extends QueryTest {
StructField("arrayOfBigInteger", ArrayType(DecimalType.Unlimited, false), true) ::
StructField("arrayOfBoolean", ArrayType(BooleanType, false), true) ::
StructField("arrayOfDouble", ArrayType(DoubleType, false), true) ::
- StructField("arrayOfInteger", ArrayType(IntegerType, false), true) ::
+ StructField("arrayOfInteger", ArrayType(LongType, false), true) ::
StructField("arrayOfLong", ArrayType(LongType, false), true) ::
StructField("arrayOfNull", ArrayType(StringType, true), true) ::
StructField("arrayOfString", ArrayType(StringType, false), true) ::
@@ -265,7 +265,7 @@ class JsonSuite extends QueryTest {
StructField("field1", BooleanType, true) ::
StructField("field2", DecimalType.Unlimited, true) :: Nil), true) ::
StructField("structWithArrayFields", StructType(
- StructField("field1", ArrayType(IntegerType, false), true) ::
+ StructField("field1", ArrayType(LongType, false), true) ::
StructField("field2", ArrayType(StringType, false), true) :: Nil), true) :: Nil)
assert(expectedSchema === jsonDF.schema)
@@ -486,7 +486,7 @@ class JsonSuite extends QueryTest {
val jsonDF = jsonRDD(complexFieldValueTypeConflict)
val expectedSchema = StructType(
- StructField("array", ArrayType(IntegerType, false), true) ::
+ StructField("array", ArrayType(LongType, false), true) ::
StructField("num_struct", StringType, true) ::
StructField("str_array", StringType, true) ::
StructField("struct", StructType(
@@ -540,7 +540,7 @@ class JsonSuite extends QueryTest {
val expectedSchema = StructType(
StructField("a", BooleanType, true) ::
StructField("b", LongType, true) ::
- StructField("c", ArrayType(IntegerType, false), true) ::
+ StructField("c", ArrayType(LongType, false), true) ::
StructField("d", StructType(
StructField("field", BooleanType, true) :: Nil), true) ::
StructField("e", StringType, true) :: Nil)
@@ -560,7 +560,7 @@ class JsonSuite extends QueryTest {
StructField("bigInteger", DecimalType.Unlimited, true) ::
StructField("boolean", BooleanType, true) ::
StructField("double", DoubleType, true) ::
- StructField("integer", IntegerType, true) ::
+ StructField("integer", LongType, true) ::
StructField("long", LongType, true) ::
StructField("null", StringType, true) ::
StructField("string", StringType, true) :: Nil)
@@ -781,12 +781,12 @@ class JsonSuite extends QueryTest {
ArrayType(ArrayType(ArrayType(ArrayType(StringType, false), false), true), false), true) ::
StructField("field2",
ArrayType(ArrayType(
- StructType(StructField("Test", IntegerType, true) :: Nil), false), true), true) ::
+ StructType(StructField("Test", LongType, true) :: Nil), false), true), true) ::
StructField("field3",
ArrayType(ArrayType(
StructType(StructField("Test", StringType, true) :: Nil), true), false), true) ::
StructField("field4",
- ArrayType(ArrayType(ArrayType(IntegerType, false), true), false), true) :: Nil)
+ ArrayType(ArrayType(ArrayType(LongType, false), true), false), true) :: Nil)
assert(schema === jsonDF.schema)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org