You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "e (JIRA)" <ji...@apache.org> on 2018/11/06 06:22:00 UTC
[jira] [Updated] (SPARK-25948) Spark load floating point number is
automatically rounded to an integer
[ https://issues.apache.org/jira/browse/SPARK-25948?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
e updated SPARK-25948:
----------------------
Description:
When you use spark load redshift to aggregate data, the floating point number is automatically rounded to an integer.
{code:sql}
drop table tmp.t1 ;
create table tmp.t1 (
id varchar(16),
quantity integer,
amount numeric(25,4)
)
;
insert into tmp.t1 values ('a1',1,11);
insert into tmp.t1 values ('a1',1,11.2);
insert into tmp.t1 values ('a1',2,2.74);
{code}
{code:scala}
val ss = SparkSession.builder()
.appName("tmp")
.master("local[12]")
.getOrCreate()
val database="REDSHIFT"
val dconfig=GetConf()
val sql=
"""
|(select sum(amount*quantity ) amount,sysdate load_time from tmp.t1)
""".stripMargin
val sql1=
"""
|(select sum(amount*quantity ) amount from tmp.t1)
""".stripMargin
val df = ss.read.format("jdbc").options(Map(
"url" -> JdbcUrl(database),
"user" -> dconfig.getString(s"$database.CONN_USER"),
"password" -> dconfig.getString(s"$database.CONN_PASSWORD"),
"driver" -> dconfig.getString(s"$database.CONN_DRIVER"),
"dbtable"-> sql
)).load().repartition(1)
val df1 = ss.read.format("jdbc").options(Map(
"url" -> JdbcUrl(database),
"user" -> dconfig.getString(s"$database.CONN_USER"),
"password" -> dconfig.getString(s"$database.CONN_PASSWORD"),
"driver" -> dconfig.getString(s"$database.CONN_DRIVER"),
"dbtable"-> sql1
)).load().repartition(1)
println(df.show(false))
println(df1.show(false))
result:
+------+--------------------------+
|amount|load_time |
+------+--------------------------+
|28 |2018-11-06 14:12:46.184986|
+------+--------------------------+
+-------+
|amount |
+-------+
|27.6800|
+-------+
{code}
h5. different :
{code:sql}
select sum(amount*quantity ) amount,sysdate load_time from tmp.t1
select sum(amount*quantity ) amount from tmp.t1
{code}
was:
When you use spark load redshift to aggregate data, the floating point number is automatically rounded to an integer.
{code:sql}
drop table tmp.t1 ;
create table tmp.t1 (
id varchar(16),
quantity integer,
amount numeric(25,4)
)
;
insert into tmp.t1 values ('a1',1,11);
insert into tmp.t1 values ('a1',1,11.2);
insert into tmp.t1 values ('a1',2,2.74);
{code}
{code:scala}
val ss = SparkSession.builder()
.appName("tmp")
.master("local[12]")
.getOrCreate()
val database="REDSHIFT"
val dconfig=GetConf()
val sql=
"""
|(select sum(amount*quantity ) amount,sysdate load_time from tmp.t1)
""".stripMargin
val df = ss.read.format("jdbc").options(Map(
"url" -> JdbcUrl(database),
"user" -> dconfig.getString(s"$database.CONN_USER"),
"password" -> dconfig.getString(s"$database.CONN_PASSWORD"),
"driver" -> dconfig.getString(s"$database.CONN_DRIVER"),
"dbtable"-> sql
)).load().repartition(1)
println(df.show(false))
result:
+------+--------------------------+
|amount|load_time |
+------+--------------------------+
|28 |2018-11-06 14:12:46.184986|
+------+--------------------------+
{code}
> Spark load floating point number is automatically rounded to an integer
> -----------------------------------------------------------------------
>
> Key: SPARK-25948
> URL: https://issues.apache.org/jira/browse/SPARK-25948
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 2.3.0
> Environment: redshift
> java 1.8.0_192 (oracle)
> scala 2.11.12
> spark 2.3.0
> Reporter: e
> Priority: Critical
>
> When you use spark load redshift to aggregate data, the floating point number is automatically rounded to an integer.
> {code:sql}
> drop table tmp.t1 ;
> create table tmp.t1 (
> id varchar(16),
> quantity integer,
> amount numeric(25,4)
> )
> ;
> insert into tmp.t1 values ('a1',1,11);
> insert into tmp.t1 values ('a1',1,11.2);
> insert into tmp.t1 values ('a1',2,2.74);
> {code}
> {code:scala}
> val ss = SparkSession.builder()
> .appName("tmp")
> .master("local[12]")
> .getOrCreate()
> val database="REDSHIFT"
> val dconfig=GetConf()
> val sql=
> """
> |(select sum(amount*quantity ) amount,sysdate load_time from tmp.t1)
> """.stripMargin
> val sql1=
> """
> |(select sum(amount*quantity ) amount from tmp.t1)
> """.stripMargin
>
> val df = ss.read.format("jdbc").options(Map(
> "url" -> JdbcUrl(database),
> "user" -> dconfig.getString(s"$database.CONN_USER"),
> "password" -> dconfig.getString(s"$database.CONN_PASSWORD"),
> "driver" -> dconfig.getString(s"$database.CONN_DRIVER"),
> "dbtable"-> sql
> )).load().repartition(1)
> val df1 = ss.read.format("jdbc").options(Map(
> "url" -> JdbcUrl(database),
> "user" -> dconfig.getString(s"$database.CONN_USER"),
> "password" -> dconfig.getString(s"$database.CONN_PASSWORD"),
> "driver" -> dconfig.getString(s"$database.CONN_DRIVER"),
> "dbtable"-> sql1
> )).load().repartition(1)
> println(df.show(false))
> println(df1.show(false))
> result:
> +------+--------------------------+
> |amount|load_time |
> +------+--------------------------+
> |28 |2018-11-06 14:12:46.184986|
> +------+--------------------------+
> +-------+
> |amount |
> +-------+
> |27.6800|
> +-------+
> {code}
> h5. different :
> {code:sql}
> select sum(amount*quantity ) amount,sysdate load_time from tmp.t1
> select sum(amount*quantity ) amount from tmp.t1
> {code}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org