You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Apache Spark (Jira)" <ji...@apache.org> on 2022/05/22 03:26:00 UTC
[jira] [Assigned] (SPARK-39248) Decimal divide much slower than multiply
[ https://issues.apache.org/jira/browse/SPARK-39248?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Apache Spark reassigned SPARK-39248:
------------------------------------
Assignee: (was: Apache Spark)
> Decimal divide much slower than multiply
> ----------------------------------------
>
> Key: SPARK-39248
> URL: https://issues.apache.org/jira/browse/SPARK-39248
> Project: Spark
> Issue Type: Improvement
> Components: SQL
> Affects Versions: 3.4.0
> Reporter: Yuming Wang
> Priority: Major
>
> How to reproduce this issue:
> {code:scala}
> import org.apache.spark.benchmark.Benchmark
> val valuesPerIteration = 2880404L
> val dir = "/tmp/spark/benchmark"
> spark.range(2880404L).selectExpr("cast(id as DECIMAL(9,2)) as d").write.mode("Overwrite").parquet(dir)
> val benchmark = new Benchmark("Benchmark decimal", valuesPerIteration, minNumIters = 5)
> benchmark.addCase("d * 2 > 0") { _ =>
> spark.read.parquet(dir).where("d * 2 > 0").write.format("noop").mode("Overwrite").save()
> }
> benchmark.addCase("d / 2 > 0") { _ =>
> spark.read.parquet(dir).where("d / 2 > 0").write.format("noop").mode("Overwrite").save()
> }
> benchmark.run()
> {code}
> {noformat}
> Java HotSpot(TM) 64-Bit Server VM 1.8.0_281-b09 on Mac OS X 10.15.7
> Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
> Benchmark decimal: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
> ------------------------------------------------------------------------------------------------------------------------
> d * 2 > 0 435 558 151 6.6 150.9 1.0X
> d / 2 > 0 5569 6208 734 0.5 1933.2 0.1X
> {noformat}
> Current stack trace:
> {noformat}
> java.math.MutableBigInteger.divideKnuth(MutableBigInteger.java:1203)
> java.math.MutableBigInteger.divideKnuth(MutableBigInteger.java:1163)
> java.math.BigInteger.divideAndRemainderKnuth(BigInteger.java:2235)
> java.math.BigInteger.divideAndRemainder(BigInteger.java:2223)
> java.math.BigDecimal.createAndStripZerosToMatchScale(BigDecimal.java:4404)
> java.math.BigDecimal.divideAndRound(BigDecimal.java:4294)
> java.math.BigDecimal.divide(BigDecimal.java:4660)
> java.math.BigDecimal.divide(BigDecimal.java:1753)
> org.apache.spark.sql.types.Decimal.$div(Decimal.scala:505)
> org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
> org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
> org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:760)
> org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$.$anonfun$run$1(WriteToDataSourceV2Exec.scala:435)
> org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$$$Lambda$2997/2025304705.apply(Unknown Source)
> org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1538)
> org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$.run(WriteToDataSourceV2Exec.scala:480)
> org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.$anonfun$writeWithV2$2(WriteToDataSourceV2Exec.scala:381)
> org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec$$Lambda$2987/1586195133.apply(Unknown Source)
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92)
> org.apache.spark.scheduler.Task.run(Task.scala:139)
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548)
> org.apache.spark.executor.Executor$TaskRunner$$Lambda$2921/365880128.apply(Unknown Source)
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504)
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551)
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> java.lang.Thread.run(Thread.java:748)
> {noformat}
--
This message was sent by Atlassian Jira
(v8.20.7#820007)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org