You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2019/07/15 19:13:06 UTC

[spark] branch master updated: [SPARK-28152][SQL] Mapped ShortType to SMALLINT and FloatType to REAL for MsSqlServerDialect

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new d8996fd  [SPARK-28152][SQL] Mapped ShortType to SMALLINT and FloatType to REAL for MsSqlServerDialect
d8996fd is described below

commit d8996fd94056948529bd9d6fca7a36fe91e85eae
Author: shivsood <sh...@microsoft.com>
AuthorDate: Mon Jul 15 12:12:36 2019 -0700

    [SPARK-28152][SQL] Mapped ShortType to SMALLINT and FloatType to REAL for MsSqlServerDialect
    
    ## What changes were proposed in this pull request?
    This PR aims to correct mappings in `MsSqlServerDialect`. `ShortType` is mapped to `SMALLINT` and `FloatType` is mapped to `REAL` per [JBDC mapping]( https://docs.microsoft.com/en-us/sql/connect/jdbc/using-basic-data-types?view=sql-server-2017) respectively.
    
    ShortType and FloatTypes are not correctly mapped to right JDBC types when using JDBC connector. This results in tables and spark data frame being created with unintended types. The issue was observed when validating against SQLServer.
    
    Refer [JBDC mapping]( https://docs.microsoft.com/en-us/sql/connect/jdbc/using-basic-data-types?view=sql-server-2017  ) for guidance on mappings between SQLServer, JDBC and Java. Note that java "Short" type should be mapped to JDBC "SMALLINT" and java Float should be mapped to JDBC "REAL".
    
    Some example issue that can happen because of wrong mappings
        - Write from df with column type results in a SQL table of with column type as INTEGER as opposed to SMALLINT.Thus a larger table that expected.
        - Read results in a dataframe with type INTEGER as opposed to ShortType
    
    - ShortType has a problem in both the the write and read path
    - FloatTypes only have an issue with read path. In the write path Spark data type 'FloatType' is correctly mapped to JDBC equivalent data type 'Real'. But in the read path when JDBC data types need to be converted to Catalyst data types ( getCatalystType) 'Real' gets incorrectly gets mapped to 'DoubleType' rather than 'FloatType'.
    
    Refer #28151 which contained this fix as one part of a larger PR.  Following PR #28151 discussion it was decided to file seperate PRs for each of the fixes.
    
    ## How was this patch tested?
    UnitTest added in JDBCSuite.scala and these were tested.
    Integration test updated and passed in MsSqlServerDialect.scala
    E2E test done with SQLServer
    
    Closes #25146 from shivsood/float_short_type_fix.
    
    Authored-by: shivsood <sh...@microsoft.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 .../apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala  | 12 ++++++------
 .../scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala |  7 ++++++-
 .../src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala | 11 +++++++++++
 3 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
index 82ce16c..efd7ca7 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
@@ -120,24 +120,24 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite {
     assert(types.length == 12)
     assert(types(0).equals("class java.lang.Boolean"))
     assert(types(1).equals("class java.lang.Integer"))
-    assert(types(2).equals("class java.lang.Integer"))
+    assert(types(2).equals("class java.lang.Short"))
     assert(types(3).equals("class java.lang.Integer"))
     assert(types(4).equals("class java.lang.Long"))
     assert(types(5).equals("class java.lang.Double"))
-    assert(types(6).equals("class java.lang.Double"))
-    assert(types(7).equals("class java.lang.Double"))
+    assert(types(6).equals("class java.lang.Float"))
+    assert(types(7).equals("class java.lang.Float"))
     assert(types(8).equals("class java.math.BigDecimal"))
     assert(types(9).equals("class java.math.BigDecimal"))
     assert(types(10).equals("class java.math.BigDecimal"))
     assert(types(11).equals("class java.math.BigDecimal"))
     assert(row.getBoolean(0) == false)
     assert(row.getInt(1) == 255)
-    assert(row.getInt(2) == 32767)
+    assert(row.getShort(2) == 32767)
     assert(row.getInt(3) == 2147483647)
     assert(row.getLong(4) == 9223372036854775807L)
     assert(row.getDouble(5) == 1.2345678901234512E14) // float = float(53) has 15-digits precision
-    assert(row.getDouble(6) == 1.23456788103168E14)   // float(24) has 7-digits precision
-    assert(row.getDouble(7) == 1.23456788103168E14)   // real = float(24)
+    assert(row.getFloat(6) == 1.23456788103168E14)   // float(24) has 7-digits precision
+    assert(row.getFloat(7) == 1.23456788103168E14)   // real = float(24)
     assert(row.getAs[BigDecimal](8).equals(new BigDecimal("123.00")))
     assert(row.getAs[BigDecimal](9).equals(new BigDecimal("12345.12000")))
     assert(row.getAs[BigDecimal](10).equals(new BigDecimal("922337203685477.5800")))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
index 29500cf..805f73d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
@@ -30,7 +30,11 @@ private object MsSqlServerDialect extends JdbcDialect {
       // String is recommend by Microsoft SQL Server for datetimeoffset types in non-MS clients
       Option(StringType)
     } else {
-      None
+      sqlType match {
+        case java.sql.Types.SMALLINT => Some(ShortType)
+        case java.sql.Types.REAL => Some(FloatType)
+        case _ => None
+      }
     }
   }
 
@@ -39,6 +43,7 @@ private object MsSqlServerDialect extends JdbcDialect {
     case StringType => Some(JdbcType("NVARCHAR(MAX)", java.sql.Types.NVARCHAR))
     case BooleanType => Some(JdbcType("BIT", java.sql.Types.BIT))
     case BinaryType => Some(JdbcType("VARBINARY(MAX)", java.sql.Types.VARBINARY))
+    case ShortType => Some(JdbcType("SMALLINT", java.sql.Types.SMALLINT))
     case _ => None
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index a28f4e4..a47fc18 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -895,6 +895,17 @@ class JDBCSuite extends QueryTest
       "BIT")
     assert(msSqlServerDialect.getJDBCType(BinaryType).map(_.databaseTypeDefinition).get ==
       "VARBINARY(MAX)")
+    assert(msSqlServerDialect.getJDBCType(ShortType).map(_.databaseTypeDefinition).get ==
+      "SMALLINT")
+  }
+
+  test("SPARK-28152 MsSqlServerDialect catalyst type mapping") {
+    val msSqlServerDialect = JdbcDialects.get("jdbc:sqlserver")
+    val metadata = new MetadataBuilder().putLong("scale", 1)
+    assert(msSqlServerDialect.getCatalystType(java.sql.Types.SMALLINT, "SMALLINT", 1,
+      metadata).get == ShortType)
+    assert(msSqlServerDialect.getCatalystType(java.sql.Types.REAL, "REAL", 1,
+      metadata).get == FloatType)
   }
 
   test("table exists query by jdbc dialect") {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org