You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "pin_zhang (JIRA)" <ji...@apache.org> on 2016/09/05 01:35:20 UTC
[jira] [Created] (SPARK-17395) Queries on CSV partition table
result in frequent GC
pin_zhang created SPARK-17395:
---------------------------------
Summary: Queries on CSV partition table result in frequent GC
Key: SPARK-17395
URL: https://issues.apache.org/jira/browse/SPARK-17395
Project: Spark
Issue Type: Bug
Components: SQL
Affects Versions: 2.0.0, 1.6.2, 1.5.2
Reporter: pin_zhang
1. Create external partitioned table and run sqls against the table
2. Run the queries for a while, driver JVM does frequent GC
increase head size won't resolve this issue.
3. Test code
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.sql.hive.thriftserver.HiveThriftServer2
object Bugs {
def main(args: Array[String]): Unit = {
val location = "file:///g:/home/test/csv"
val create = s"""CREATE EXTERNAL TABLE test_csv
(ID string, SEQ string )
PARTITIONED BY(index int)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
LOCATION "${location}"
"""
val add_part = s"""
ALTER TABLE test_csv ADD
PARTITION (index=1)LOCATION '${location}/index=1'
PARTITION (index=2)LOCATION '${location}/index=2'
PARTITION (index=3)LOCATION '${location}/index=3'
PARTITION (index=4)LOCATION '${location}/index=4'
PARTITION (index=5)LOCATION '${location}/index=5'
PARTITION (index=6)LOCATION '${location}/index=6'
PARTITION (index=7)LOCATION '${location}/index=7'
PARTITION (index=8)LOCATION '${location}/index=8'
PARTITION (index=9)LOCATION '${location}/index=9'
PARTITION (index=10)LOCATION '${location}/index=10'
PARTITION (index=11)LOCATION '${location}/index=11'
PARTITION (index=12)LOCATION '${location}/index=12'
PARTITION (index=13)LOCATION '${location}/index=13'
PARTITION (index=14)LOCATION '${location}/index=14'
PARTITION (index=15)LOCATION '${location}/index=15'
PARTITION (index=16)LOCATION '${location}/index=16'
"""
val conf = new SparkConf().setAppName("scala").setMaster("local[2]")
val ctx = new SparkContext(conf)
val hctx = new HiveContext(ctx)
hctx.sql(create)
hctx.sql(add_part)
for (i <- 1 to 6) {
new Query(hctx).start()
}
}
class Query(htcx: HiveContext) extends Thread {
setName("Query-Thread")
override def run = {
while (true) {
htcx.sql("select count(*) from test_csv").show()
Thread.sleep(100)
}
}
}
}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org