You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Yuming Wang (Jira)" <ji...@apache.org> on 2019/09/28 05:27:00 UTC
[jira] [Comment Edited] (SPARK-29274) Can not coerce decimal type
to double type when it's join key
[ https://issues.apache.org/jira/browse/SPARK-29274?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16939862#comment-16939862 ]
Yuming Wang edited comment on SPARK-29274 at 9/28/19 5:26 AM:
--------------------------------------------------------------
SQL Server will cast string type to decimal type:
{noformat}
create table t1 (incdata_id decimal(21,0), v VARCHAR(21))
create table t2 (incdata_id VARCHAR(210), v VARCHAR(21))
insert into t1 values(100000000001636981212, '1')
insert into t2 values(100000000001636981213, '2')
1> explain select * from t1 join t2 on (t1.incdata_id = t2.incdata_id)
2> go
StmtText StmtId NodeId Parent PhysicalOp LogicalOp Argument DefinedValues EstimateRows EstimateIO EstimateCPU AvgRowSize TotalSubtreeCost OutputList Warnings Type Parallel EstimateExecutions
------------------------------------------------------------------------------------------------------------------------------------------ ----------- ----------- ----------- ------------------------------ ------------------------------ ------------------------------------------------------------------------------------------------------------- ------------------------------------------------------------------------------ -------------- -------------- -------------- ----------- ---------------- --------------------------------------------------------------------------------------------------------------------- -------- ---------------------------------------------------------------- -------- ------------------
select * from t1 join t2 on (t1.incdata_id = t2.incdata_id); 1 1 0 NULL NULL 1 NULL 1.0 NULL NULL NULL 2.4348916E-2 NULL NULL SELECT 0 NULL
|--Hash Match(Inner Join, HASH:([master].[dbo].[t1].[incdata_id])=([Expr1006]), RESIDUAL:([master].[dbo].[t1].[incdata_id]=[Expr1006])) 1 2 1 Hash Match Inner Join HASH:([master].[dbo].[t1].[incdata_id])=([Expr1006]), RESIDUAL:([master].[dbo].[t1].[incdata_id]=[Expr1006]) NULL 1.0 0.0 1.7779617E-2 59 2.4348916E-2 [master].[dbo].[t1].[incdata_id], [master].[dbo].[t1].[v], [master].[dbo].[t2].[incdata_id], [master].[dbo].[t2].[v] NULL PLAN_ROW 0 1.0
|--Table Scan(OBJECT:([master].[dbo].[t1])) 1 3 2 Table Scan Table Scan OBJECT:([master].[dbo].[t1]) [master].[dbo].[t1].[incdata_id], [master].[dbo].[t1].[v] 1.0 0.003125 0.0001581 24 0.0032831 [master].[dbo].[t1].[incdata_id], [master].[dbo].[t1].[v] NULL PLAN_ROW 0 1.0
|--Compute Scalar(DEFINE:([Expr1006]=CONVERT_IMPLICIT(decimal(21,0),[master].[dbo].[t2].[incdata_id],0))) 1 4 2 Compute Scalar Compute Scalar DEFINE:([Expr1006]=CONVERT_IMPLICIT(decimal(21,0),[master].[dbo].[t2].[incdata_id],0)) [Expr1006]=CONVERT_IMPLICIT(decimal(21,0),[master].[dbo].[t2].[incdata_id],0) 1.0 0.0 0.0000001 57 3.2832001E-3 [master].[dbo].[t2].[incdata_id], [master].[dbo].[t2].[v], [Expr1006] NULL PLAN_ROW 0 1.0
|--Table Scan(OBJECT:([master].[dbo].[t2])) 1 5 4 Table Scan Table Scan OBJECT:([master].[dbo].[t2]) [master].[dbo].[t2].[incdata_id], [master].[dbo].[t2].[v] 1.0 0.003125 0.0001581 44 0.0032831 [master].[dbo].[t2].[incdata_id], [master].[dbo].[t2].[v] NULL PLAN_ROW 0 1.0
(5 rows affected)
1> select * from t1 join t2 on (t1.incdata_id = t2.incdata_id)
2> go
incdata_id v incdata_id v
----------------------- - --------------------- ---------------------
(0 rows affected)
{noformat}
was (Author: q79969786):
SQL server will cast string type to decimal type:
{noformat}
create table t1 (incdata_id decimal(21,0), v VARCHAR(21))
create table t2 (incdata_id VARCHAR(210), v VARCHAR(21))
insert into t1 values(100000000001636981212, '1')
insert into t2 values(100000000001636981213, '2')
1> explain select * from t1 join t2 on (t1.incdata_id = t2.incdata_id)
2> go
StmtText StmtId NodeId Parent PhysicalOp LogicalOp Argument DefinedValues EstimateRows EstimateIO EstimateCPU AvgRowSize TotalSubtreeCost OutputList Warnings Type Parallel EstimateExecutions
------------------------------------------------------------------------------------------------------------------------------------------ ----------- ----------- ----------- ------------------------------ ------------------------------ ------------------------------------------------------------------------------------------------------------- ------------------------------------------------------------------------------ -------------- -------------- -------------- ----------- ---------------- --------------------------------------------------------------------------------------------------------------------- -------- ---------------------------------------------------------------- -------- ------------------
select * from t1 join t2 on (t1.incdata_id = t2.incdata_id); 1 1 0 NULL NULL 1 NULL 1.0 NULL NULL NULL 2.4348916E-2 NULL NULL SELECT 0 NULL
|--Hash Match(Inner Join, HASH:([master].[dbo].[t1].[incdata_id])=([Expr1006]), RESIDUAL:([master].[dbo].[t1].[incdata_id]=[Expr1006])) 1 2 1 Hash Match Inner Join HASH:([master].[dbo].[t1].[incdata_id])=([Expr1006]), RESIDUAL:([master].[dbo].[t1].[incdata_id]=[Expr1006]) NULL 1.0 0.0 1.7779617E-2 59 2.4348916E-2 [master].[dbo].[t1].[incdata_id], [master].[dbo].[t1].[v], [master].[dbo].[t2].[incdata_id], [master].[dbo].[t2].[v] NULL PLAN_ROW 0 1.0
|--Table Scan(OBJECT:([master].[dbo].[t1])) 1 3 2 Table Scan Table Scan OBJECT:([master].[dbo].[t1]) [master].[dbo].[t1].[incdata_id], [master].[dbo].[t1].[v] 1.0 0.003125 0.0001581 24 0.0032831 [master].[dbo].[t1].[incdata_id], [master].[dbo].[t1].[v] NULL PLAN_ROW 0 1.0
|--Compute Scalar(DEFINE:([Expr1006]=CONVERT_IMPLICIT(decimal(21,0),[master].[dbo].[t2].[incdata_id],0))) 1 4 2 Compute Scalar Compute Scalar DEFINE:([Expr1006]=CONVERT_IMPLICIT(decimal(21,0),[master].[dbo].[t2].[incdata_id],0)) [Expr1006]=CONVERT_IMPLICIT(decimal(21,0),[master].[dbo].[t2].[incdata_id],0) 1.0 0.0 0.0000001 57 3.2832001E-3 [master].[dbo].[t2].[incdata_id], [master].[dbo].[t2].[v], [Expr1006] NULL PLAN_ROW 0 1.0
|--Table Scan(OBJECT:([master].[dbo].[t2])) 1 5 4 Table Scan Table Scan OBJECT:([master].[dbo].[t2]) [master].[dbo].[t2].[incdata_id], [master].[dbo].[t2].[v] 1.0 0.003125 0.0001581 44 0.0032831 [master].[dbo].[t2].[incdata_id], [master].[dbo].[t2].[v] NULL PLAN_ROW 0 1.0
(5 rows affected)
1> select * from t1 join t2 on (t1.incdata_id = t2.incdata_id)
2> go
incdata_id v incdata_id v
----------------------- - --------------------- ---------------------
(0 rows affected)
{noformat}
> Can not coerce decimal type to double type when it's join key
> -------------------------------------------------------------
>
> Key: SPARK-29274
> URL: https://issues.apache.org/jira/browse/SPARK-29274
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 2.3.4, 2.4.4, 3.0.0
> Reporter: Yuming Wang
> Assignee: Pengfei Chang
> Priority: Major
> Attachments: image-2019-09-27-20-20-24-238.png
>
>
> How to reproduce this issue:
> {code:sql}
> create table t1 (incdata_id decimal(21,0), v string) using parquet;
> create table t2 (incdata_id string, v string) using parquet;
> explain select * from t1 join t2 on (t1.incdata_id = t2.incdata_id);
> == Physical Plan ==
> *(5) SortMergeJoin [knownfloatingpointnormalized(normalizenanandzero(cast(incdata_id#31 as double)))], [knownfloatingpointnormalized(normalizenanandzero(cast(incdata_id#33 as double)))], Inner
> :- *(2) Sort [knownfloatingpointnormalized(normalizenanandzero(cast(incdata_id#31 as double))) ASC NULLS FIRST], false, 0
> : +- Exchange hashpartitioning(knownfloatingpointnormalized(normalizenanandzero(cast(incdata_id#31 as double))), 200), true, [id=#104]
> : +- *(1) Filter isnotnull(incdata_id#31)
> : +- Scan hive default.t1 [incdata_id#31, v#32], HiveTableRelation `default`.`t1`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [incdata_id#31, v#32], Statistics(sizeInBytes=8.0 EiB)
> +- *(4) Sort [knownfloatingpointnormalized(normalizenanandzero(cast(incdata_id#33 as double))) ASC NULLS FIRST], false, 0
> +- Exchange hashpartitioning(knownfloatingpointnormalized(normalizenanandzero(cast(incdata_id#33 as double))), 200), true, [id=#112]
> +- *(3) Filter isnotnull(incdata_id#33)
> +- Scan hive default.t2 [incdata_id#33, v#34], HiveTableRelation `default`.`t2`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [incdata_id#33, v#34], Statistics(sizeInBytes=8.0 EiB)
> {code}
> {code:sql}
> select cast(v1 as double) as v3, cast(v2 as double) as v4,
> cast(v1 as double) = cast(v2 as double), v1 = v2
> from (select cast('100000000001636981212' as decimal(21, 0)) as v1,
> cast('100000000001636981213' as decimal(21, 0)) as v2) t;
> 1.0000000000163697E20 1.0000000000163697E20 true false
> {code}
>
> It's a realy case in our production:
> !image-2019-09-27-20-20-24-238.png|width=100%!
--
This message was sent by Atlassian Jira
(v8.3.4#803005)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org