You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "HongJin (Jira)" <ji...@apache.org> on 2020/03/27 06:21:00 UTC
[jira] [Updated] (SPARK-31281) Hit OOM Error - GC Limit
[ https://issues.apache.org/jira/browse/SPARK-31281?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
HongJin updated SPARK-31281:
----------------------------
Description:
MemoryStore is 2.6GB
conf = new SparkConf().setAppName("test")
//.set("spark.sql.codegen.wholeStage", "false")
.set("spark.driver.host", "localhost")
.set("spark.driver.memory", "4g")
.set("spark.executor.cores","1")
.set("spark.num.executors","1")
.set("spark.executor.memory", "4g")
.set("spark.executor.memoryOverhead", "400m")
.set("spark.dynamicAllocation.enabled", "true")
.set("spark.dynamicAllocation.minExecutors","1")
.set("spark.dynamicAllocation.maxExecutors","2")
.set("spark.ui.enabled","true") //enable spark UI
.set("spark.sql.shuffle.partitions",defaultPartitions)
.setMaster("local[2]")
sparkSession = SparkSession.builder.config(conf).getOrCreate()
val df = SparkFactory.sparkSession.sqlContext
.read
.option("header", "true")
.option("delimiter", delimiter)
.csv(textFileLocation)
joinedDf = upperCaseLeft.as("l")
.join(upperCaseRight.as("r"), caseTransformedKeys, "full_outer")
.select(compositeKeysCol ::: nonKeyCols.map(col => mapHelper(col,toleranceValue,caseSensitive)): _*)
data = joinedDf.take(maxRecords)
was:
conf = new SparkConf().setAppName("test")
//.set("spark.sql.codegen.wholeStage", "false")
.set("spark.driver.host", "localhost")
.set("spark.driver.memory", "4g")
.set("spark.executor.cores","1")
.set("spark.num.executors","1")
.set("spark.executor.memory", "4g")
.set("spark.executor.memoryOverhead", "400m")
.set("spark.dynamicAllocation.enabled", "true")
.set("spark.dynamicAllocation.minExecutors","1")
.set("spark.dynamicAllocation.maxExecutors","2")
.set("spark.ui.enabled","true") //enable spark UI
.set("spark.sql.shuffle.partitions",defaultPartitions)
.setMaster(numCores)
sparkSession = SparkSession.builder.config(conf).getOrCreate()
val df = SparkFactory.sparkSession.sqlContext
.read
.option("header", "true")
.option("delimiter", delimiter)
.csv(textFileLocation)
joinedDf = upperCaseLeft.as("l")
.join(upperCaseRight.as("r"), caseTransformedKeys, "full_outer")
.select(compositeKeysCol ::: nonKeyCols.map(col => mapHelper(col,toleranceValue,caseSensitive)): _*)
data = joinedDf.take(maxRecords)
> Hit OOM Error - GC Limit
> ------------------------
>
> Key: SPARK-31281
> URL: https://issues.apache.org/jira/browse/SPARK-31281
> Project: Spark
> Issue Type: Question
> Components: Java API
> Affects Versions: 2.4.4
> Reporter: HongJin
> Priority: Critical
>
> MemoryStore is 2.6GB
> conf = new SparkConf().setAppName("test")
> //.set("spark.sql.codegen.wholeStage", "false")
> .set("spark.driver.host", "localhost")
> .set("spark.driver.memory", "4g")
> .set("spark.executor.cores","1")
> .set("spark.num.executors","1")
> .set("spark.executor.memory", "4g")
> .set("spark.executor.memoryOverhead", "400m")
> .set("spark.dynamicAllocation.enabled", "true")
> .set("spark.dynamicAllocation.minExecutors","1")
> .set("spark.dynamicAllocation.maxExecutors","2")
> .set("spark.ui.enabled","true") //enable spark UI
> .set("spark.sql.shuffle.partitions",defaultPartitions)
> .setMaster("local[2]")
> sparkSession = SparkSession.builder.config(conf).getOrCreate()
>
> val df = SparkFactory.sparkSession.sqlContext
> .read
> .option("header", "true")
> .option("delimiter", delimiter)
> .csv(textFileLocation)
>
> joinedDf = upperCaseLeft.as("l")
> .join(upperCaseRight.as("r"), caseTransformedKeys, "full_outer")
> .select(compositeKeysCol ::: nonKeyCols.map(col => mapHelper(col,toleranceValue,caseSensitive)): _*)
>
> data = joinedDf.take(maxRecords)
>
>
>
>
--
This message was sent by Atlassian Jira
(v8.3.4#803005)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org