You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "angerszhu (Jira)" <ji...@apache.org> on 2020/06/12 14:39:00 UTC
[jira] [Updated] (SPARK-31947) Solve string value error about
Date/Timestamp in ScriptTransform
[ https://issues.apache.org/jira/browse/SPARK-31947?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
angerszhu updated SPARK-31947:
------------------------------
Description:
For test case
{code:java}
test("SPARK-25990: TRANSFORM should handle different data types correctly") {
assume(TestUtils.testCommandAvailable("python"))
val scriptFilePath = getTestResourcePath("test_script.py") withTempView("v") {
val df = Seq(
(1, "1", 1.0, BigDecimal(1.0), new Timestamp(1), Date.valueOf("2015-05-21")),
(2, "2", 2.0, BigDecimal(2.0), new Timestamp(2), Date.valueOf("2015-05-22")),
(3, "3", 3.0, BigDecimal(3.0), new Timestamp(3), Date.valueOf("2015-05-23"))
).toDF("a", "b", "c", "d", "e", "f") // Note column d's data type is Decimal(38, 18)
df.createTempView("v") val query = sql(
s"""
|SELECT
|TRANSFORM(a, b, c, d, e, f)
|USING 'python $scriptFilePath' AS (a, b, c, d, e, f)
|FROM v
""".stripMargin) val decimalToString: Column => Column = c => c.cast("string") checkAnswer(query, identity, df.select(
'a.cast("string"),
'b.cast("string"),
'c.cast("string"),
decimalToString('d),
'e.cast("string"),
'f.cast("string")).collect())
}
}
{code}
Get wrong result
{code:java}
[info] - SPARK-25990: TRANSFORM should handle different data types correctly *** FAILED *** (4 seconds, 997 milliseconds)
[info] Results do not match for Spark plan:
[info] ScriptTransformation [a#19, b#20, c#21, d#22, e#23, f#24], python /Users/angerszhu/Documents/project/AngersZhu/spark/sql/core/target/scala-2.12/test-classes/test_script.py, [a#31, b#32, c#33, d#34, e#35, f#36], org.apache.spark.sql.execution.script.ScriptTransformIOSchema@1ad5a29c
[info] +- Project [_1#6 AS a#19, _2#7 AS b#20, _3#8 AS c#21, _4#9 AS d#22, _5#10 AS e#23, _6#11 AS f#24]
[info] +- LocalTableScan [_1#6, _2#7, _3#8, _4#9, _5#10, _6#11]
[info]
[info]
[info] == Results ==
[info] !== Expected Answer - 3 == == Actual Answer - 3 ==
[info] ![1,1,1.0,1.000000000000000000,1970-01-01 08:00:00.001,2015-05-21] [1,1,1.0,1.000000000000000000,1000,16576]
[info] ![2,2,2.0,2.000000000000000000,1970-01-01 08:00:00.002,2015-05-22] [2,2,2.0,2.000000000000000000,2000,16577]
[info] ![3,3,3.0,3.000000000000000000,1970-01-01 08:00:00.003,2015-05-23] [3,3,3.0,3.000000000000000000,3000,16578] (SparkPlanTest.scala:95)
[
{code}
> Solve string value error about Date/Timestamp in ScriptTransform
> ----------------------------------------------------------------
>
> Key: SPARK-31947
> URL: https://issues.apache.org/jira/browse/SPARK-31947
> Project: Spark
> Issue Type: Sub-task
> Components: SQL
> Affects Versions: 3.1.0
> Reporter: angerszhu
> Priority: Major
>
> For test case
>
> {code:java}
> test("SPARK-25990: TRANSFORM should handle different data types correctly") {
> assume(TestUtils.testCommandAvailable("python"))
> val scriptFilePath = getTestResourcePath("test_script.py") withTempView("v") {
> val df = Seq(
> (1, "1", 1.0, BigDecimal(1.0), new Timestamp(1), Date.valueOf("2015-05-21")),
> (2, "2", 2.0, BigDecimal(2.0), new Timestamp(2), Date.valueOf("2015-05-22")),
> (3, "3", 3.0, BigDecimal(3.0), new Timestamp(3), Date.valueOf("2015-05-23"))
> ).toDF("a", "b", "c", "d", "e", "f") // Note column d's data type is Decimal(38, 18)
> df.createTempView("v") val query = sql(
> s"""
> |SELECT
> |TRANSFORM(a, b, c, d, e, f)
> |USING 'python $scriptFilePath' AS (a, b, c, d, e, f)
> |FROM v
> """.stripMargin) val decimalToString: Column => Column = c => c.cast("string") checkAnswer(query, identity, df.select(
> 'a.cast("string"),
> 'b.cast("string"),
> 'c.cast("string"),
> decimalToString('d),
> 'e.cast("string"),
> 'f.cast("string")).collect())
> }
> }
> {code}
>
>
> Get wrong result
> {code:java}
> [info] - SPARK-25990: TRANSFORM should handle different data types correctly *** FAILED *** (4 seconds, 997 milliseconds)
> [info] Results do not match for Spark plan:
> [info] ScriptTransformation [a#19, b#20, c#21, d#22, e#23, f#24], python /Users/angerszhu/Documents/project/AngersZhu/spark/sql/core/target/scala-2.12/test-classes/test_script.py, [a#31, b#32, c#33, d#34, e#35, f#36], org.apache.spark.sql.execution.script.ScriptTransformIOSchema@1ad5a29c
> [info] +- Project [_1#6 AS a#19, _2#7 AS b#20, _3#8 AS c#21, _4#9 AS d#22, _5#10 AS e#23, _6#11 AS f#24]
> [info] +- LocalTableScan [_1#6, _2#7, _3#8, _4#9, _5#10, _6#11]
> [info]
> [info]
> [info] == Results ==
> [info] !== Expected Answer - 3 == == Actual Answer - 3 ==
> [info] ![1,1,1.0,1.000000000000000000,1970-01-01 08:00:00.001,2015-05-21] [1,1,1.0,1.000000000000000000,1000,16576]
> [info] ![2,2,2.0,2.000000000000000000,1970-01-01 08:00:00.002,2015-05-22] [2,2,2.0,2.000000000000000000,2000,16577]
> [info] ![3,3,3.0,3.000000000000000000,1970-01-01 08:00:00.003,2015-05-23] [3,3,3.0,3.000000000000000000,3000,16578] (SparkPlanTest.scala:95)
> [
> {code}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org