You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "zhangchenglong (Jira)" <ji...@apache.org> on 2020/09/07 06:28:00 UTC

[jira] [Updated] (SPARK-32809) RDD分区数对于计算结果的影响

     [ https://issues.apache.org/jira/browse/SPARK-32809?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

zhangchenglong updated SPARK-32809:
-----------------------------------
    Description: 
class Exec3 {

private val exec: SparkConf = new SparkConf().setMaster("local[1]").setAppName("exec3")
 private val context = new SparkContext(exec)
 context.setCheckpointDir("checkPoint")
 
 /**
 * get total number by key 
 * in this project desired results are ("apple",25) ("huwei",20)
 * but in fact i get ("apple",150) ("huawei",20)
 *   when i change it to local[3] the result is correct
*  i want to know   which cause it and how to slove it 
 */
 @Test
 def testError(): Unit ={
 val rdd = context.parallelize(Seq(("apple", 10), ("apple", 15), ("huawei", 20)))
 rdd.aggregateByKey(1.0)(
 seqOp = (zero, price) => price * zero,
 combOp = (curr, agg) => curr + agg
 ).collect().foreach(println(_))
 context.stop()
 }
 }

  was:
class Exec3 {

private val exec: SparkConf = new SparkConf().setMaster("local[1]").setAppName("exec3")
 private val context = new SparkContext(exec)
 context.setCheckpointDir("checkPoint")
 
 /**
 * get total number by key 
 * in this project desired results are ("苹果",25) ("华为",20)
 * but in fact i get ("苹果",150) ("华为",20)
 *   when i change it to local[3] the result is correct
*  i want to know   which cause it and how to slove it 
 */
 @Test
 def testError(): Unit ={
 val rdd = context.parallelize(Seq(("苹果", 10), ("苹果", 15), ("华为", 20)))
 rdd.aggregateByKey(1.0)(
 seqOp = (zero, price) => price * zero,
 combOp = (curr, agg) => curr + agg
 ).collect().foreach(println(_))
 context.stop()
 }
 }


> RDD分区数对于计算结果的影响
> ---------------
>
>                 Key: SPARK-32809
>                 URL: https://issues.apache.org/jira/browse/SPARK-32809
>             Project: Spark
>          Issue Type: Bug
>          Components: Spark Core
>    Affects Versions: 2.2.0
>            Reporter: zhangchenglong
>            Priority: Major
>
> class Exec3 {
> private val exec: SparkConf = new SparkConf().setMaster("local[1]").setAppName("exec3")
>  private val context = new SparkContext(exec)
>  context.setCheckpointDir("checkPoint")
>  
>  /**
>  * get total number by key 
>  * in this project desired results are ("apple",25) ("huwei",20)
>  * but in fact i get ("apple",150) ("huawei",20)
>  *   when i change it to local[3] the result is correct
> *  i want to know   which cause it and how to slove it 
>  */
>  @Test
>  def testError(): Unit ={
>  val rdd = context.parallelize(Seq(("apple", 10), ("apple", 15), ("huawei", 20)))
>  rdd.aggregateByKey(1.0)(
>  seqOp = (zero, price) => price * zero,
>  combOp = (curr, agg) => curr + agg
>  ).collect().foreach(println(_))
>  context.stop()
>  }
>  }



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org