You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "zhangchenglong (Jira)" <ji...@apache.org> on 2020/09/07 06:28:00 UTC
[jira] [Updated] (SPARK-32809) RDD分区数对于计算结果的影响
[ https://issues.apache.org/jira/browse/SPARK-32809?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
zhangchenglong updated SPARK-32809:
-----------------------------------
Description:
class Exec3 {
private val exec: SparkConf = new SparkConf().setMaster("local[1]").setAppName("exec3")
private val context = new SparkContext(exec)
context.setCheckpointDir("checkPoint")
/**
* get total number by key
* in this project desired results are ("apple",25) ("huwei",20)
* but in fact i get ("apple",150) ("huawei",20)
* when i change it to local[3] the result is correct
* i want to know which cause it and how to slove it
*/
@Test
def testError(): Unit ={
val rdd = context.parallelize(Seq(("apple", 10), ("apple", 15), ("huawei", 20)))
rdd.aggregateByKey(1.0)(
seqOp = (zero, price) => price * zero,
combOp = (curr, agg) => curr + agg
).collect().foreach(println(_))
context.stop()
}
}
was:
class Exec3 {
private val exec: SparkConf = new SparkConf().setMaster("local[1]").setAppName("exec3")
private val context = new SparkContext(exec)
context.setCheckpointDir("checkPoint")
/**
* get total number by key
* in this project desired results are ("苹果",25) ("华为",20)
* but in fact i get ("苹果",150) ("华为",20)
* when i change it to local[3] the result is correct
* i want to know which cause it and how to slove it
*/
@Test
def testError(): Unit ={
val rdd = context.parallelize(Seq(("苹果", 10), ("苹果", 15), ("华为", 20)))
rdd.aggregateByKey(1.0)(
seqOp = (zero, price) => price * zero,
combOp = (curr, agg) => curr + agg
).collect().foreach(println(_))
context.stop()
}
}
> RDD分区数对于计算结果的影响
> ---------------
>
> Key: SPARK-32809
> URL: https://issues.apache.org/jira/browse/SPARK-32809
> Project: Spark
> Issue Type: Bug
> Components: Spark Core
> Affects Versions: 2.2.0
> Reporter: zhangchenglong
> Priority: Major
>
> class Exec3 {
> private val exec: SparkConf = new SparkConf().setMaster("local[1]").setAppName("exec3")
> private val context = new SparkContext(exec)
> context.setCheckpointDir("checkPoint")
>
> /**
> * get total number by key
> * in this project desired results are ("apple",25) ("huwei",20)
> * but in fact i get ("apple",150) ("huawei",20)
> * when i change it to local[3] the result is correct
> * i want to know which cause it and how to slove it
> */
> @Test
> def testError(): Unit ={
> val rdd = context.parallelize(Seq(("apple", 10), ("apple", 15), ("huawei", 20)))
> rdd.aggregateByKey(1.0)(
> seqOp = (zero, price) => price * zero,
> combOp = (curr, agg) => curr + agg
> ).collect().foreach(println(_))
> context.stop()
> }
> }
--
This message was sent by Atlassian Jira
(v8.3.4#803005)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org