You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "e (JIRA)" <ji...@apache.org> on 2018/11/06 06:22:00 UTC

[jira] [Updated] (SPARK-25948) Spark load floating point number is automatically rounded to an integer

     [ https://issues.apache.org/jira/browse/SPARK-25948?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

e updated SPARK-25948:
----------------------
    Description: 
When you use spark load redshift to aggregate data, the floating point number is automatically rounded to an integer.

{code:sql}
drop table  tmp.t1 ;
create table tmp.t1 (
id varchar(16),
quantity integer,
amount numeric(25,4)
)
;
insert into tmp.t1 values ('a1',1,11);
insert into tmp.t1 values ('a1',1,11.2);
insert into tmp.t1 values ('a1',2,2.74);
{code}

{code:scala}
val ss = SparkSession.builder()
    .appName("tmp")
    .master("local[12]")
    .getOrCreate()

  val database="REDSHIFT"
  val dconfig=GetConf()

  val sql=
    """
      |(select  sum(amount*quantity ) amount,sysdate load_time  from tmp.t1)
    """.stripMargin

   val sql1=
    """
      |(select  sum(amount*quantity ) amount  from tmp.t1)
    """.stripMargin
 
  val df = ss.read.format("jdbc").options(Map(
    "url" ->  JdbcUrl(database),
    "user" -> dconfig.getString(s"$database.CONN_USER"),
    "password"  ->  dconfig.getString(s"$database.CONN_PASSWORD"),
    "driver"  ->  dconfig.getString(s"$database.CONN_DRIVER"),
    "dbtable"-> sql
  )).load().repartition(1)

val df1 = ss.read.format("jdbc").options(Map(
    "url" ->  JdbcUrl(database),
    "user" -> dconfig.getString(s"$database.CONN_USER"),
    "password"  ->  dconfig.getString(s"$database.CONN_PASSWORD"),
    "driver"  ->  dconfig.getString(s"$database.CONN_DRIVER"),
    "dbtable"-> sql1
  )).load().repartition(1)

  println(df.show(false))
  println(df1.show(false))

result:

+------+--------------------------+
|amount|load_time                 |
+------+--------------------------+
|28    |2018-11-06 14:12:46.184986|
+------+--------------------------+

+-------+
|amount |
+-------+
|27.6800|
+-------+

{code}

h5.  different :

{code:sql}
select  sum(amount*quantity ) amount,sysdate load_time  from tmp.t1
select  sum(amount*quantity ) amount                                from tmp.t1
{code}

  was:
When you use spark load redshift to aggregate data, the floating point number is automatically rounded to an integer.

{code:sql}
drop table  tmp.t1 ;
create table tmp.t1 (
id varchar(16),
quantity integer,
amount numeric(25,4)
)
;
insert into tmp.t1 values ('a1',1,11);
insert into tmp.t1 values ('a1',1,11.2);
insert into tmp.t1 values ('a1',2,2.74);
{code}

{code:scala}
val ss = SparkSession.builder()
    .appName("tmp")
    .master("local[12]")
    .getOrCreate()

  val database="REDSHIFT"
  val dconfig=GetConf()

  val sql=
    """
      |(select  sum(amount*quantity ) amount,sysdate load_time  from tmp.t1)
    """.stripMargin

  val df = ss.read.format("jdbc").options(Map(
    "url" ->  JdbcUrl(database),
    "user" -> dconfig.getString(s"$database.CONN_USER"),
    "password"  ->  dconfig.getString(s"$database.CONN_PASSWORD"),
    "driver"  ->  dconfig.getString(s"$database.CONN_DRIVER"),
    "dbtable"-> sql
  )).load().repartition(1)

  println(df.show(false))

result:

+------+--------------------------+
|amount|load_time                 |
+------+--------------------------+
|28    |2018-11-06 14:12:46.184986|
+------+--------------------------+

{code}




> Spark load floating point number is automatically rounded to an integer
> -----------------------------------------------------------------------
>
>                 Key: SPARK-25948
>                 URL: https://issues.apache.org/jira/browse/SPARK-25948
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 2.3.0
>         Environment: redshift 
> java 1.8.0_192 (oracle)
>  scala 2.11.12
>  spark 2.3.0
>            Reporter: e
>            Priority: Critical
>
> When you use spark load redshift to aggregate data, the floating point number is automatically rounded to an integer.
> {code:sql}
> drop table  tmp.t1 ;
> create table tmp.t1 (
> id varchar(16),
> quantity integer,
> amount numeric(25,4)
> )
> ;
> insert into tmp.t1 values ('a1',1,11);
> insert into tmp.t1 values ('a1',1,11.2);
> insert into tmp.t1 values ('a1',2,2.74);
> {code}
> {code:scala}
> val ss = SparkSession.builder()
>     .appName("tmp")
>     .master("local[12]")
>     .getOrCreate()
>   val database="REDSHIFT"
>   val dconfig=GetConf()
>   val sql=
>     """
>       |(select  sum(amount*quantity ) amount,sysdate load_time  from tmp.t1)
>     """.stripMargin
>    val sql1=
>     """
>       |(select  sum(amount*quantity ) amount  from tmp.t1)
>     """.stripMargin
>  
>   val df = ss.read.format("jdbc").options(Map(
>     "url" ->  JdbcUrl(database),
>     "user" -> dconfig.getString(s"$database.CONN_USER"),
>     "password"  ->  dconfig.getString(s"$database.CONN_PASSWORD"),
>     "driver"  ->  dconfig.getString(s"$database.CONN_DRIVER"),
>     "dbtable"-> sql
>   )).load().repartition(1)
> val df1 = ss.read.format("jdbc").options(Map(
>     "url" ->  JdbcUrl(database),
>     "user" -> dconfig.getString(s"$database.CONN_USER"),
>     "password"  ->  dconfig.getString(s"$database.CONN_PASSWORD"),
>     "driver"  ->  dconfig.getString(s"$database.CONN_DRIVER"),
>     "dbtable"-> sql1
>   )).load().repartition(1)
>   println(df.show(false))
>   println(df1.show(false))
> result:
> +------+--------------------------+
> |amount|load_time                 |
> +------+--------------------------+
> |28    |2018-11-06 14:12:46.184986|
> +------+--------------------------+
> +-------+
> |amount |
> +-------+
> |27.6800|
> +-------+
> {code}
> h5.  different :
> {code:sql}
> select  sum(amount*quantity ) amount,sysdate load_time  from tmp.t1
> select  sum(amount*quantity ) amount                                from tmp.t1
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org