You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@kylin.apache.org by Alone <23...@qq.com> on 2018/07/06 10:29:03 UTC

Build Cube with Spark,OutOfMemoryError

Hello.When I builded a cube with spark , OutOfMemoryError has  happended. the detail was  :

  18/07/06 18:10:59 INFO TaskSetManager: Finished task 57.0 in stage 1.0 (TID 212) in 41733 ms on bigdata07 (executor 1) (58/58)18/07/06 18:10:59 INFO YarnScheduler: Removed TaskSet 1.0, whose tasks have all completed, from pool 
18/07/06 18:10:59 INFO DAGScheduler: ResultStage 1 (saveAsNewAPIHadoopDataset at SparkCubingByLayer.java:277) finished in 685.108 s
18/07/06 18:10:59 INFO DAGScheduler: Job 0 finished: saveAsNewAPIHadoopDataset at SparkCubingByLayer.java:277, took 1860.288558 s
18/07/06 18:11:03 INFO SparkCubingByLayer: Persisting RDD for level 0 into hdfs://bigdata01:8020/kylin/kylin_metadata/kylin-e51b3990-2800-4f9d-940e-8cc111a7523c/test03/cuboid/level_base_cuboid
18/07/06 18:11:03 INFO DictionaryManager: Dict with resource path /dict/GZHG_DATA.PMS_PASSENGER_MANIFEST_BODY_NEW/CERT_NO/e71dd8c5-767d-4405-b502-9ab50fb1ff20.dict is removed due to COLLECTED
18/07/06 18:11:03 INFO DictionaryManager: DictionaryManager(452625009) loading DictionaryInfo(loadDictObj:true) at /dict/GZHG_DATA.PMS_PASSENGER_MANIFEST_BODY_NEW/FLIGHT_ID/10f549ac-e8b9-44a4-8d17-3a6b604636ae.dict
18/07/06 18:11:03 INFO DictionaryManager: Dict with resource path /dict/GZHG_DATA.PMS_PASSENGER_MANIFEST_BODY_NEW/FROM_PLACE/64315189-7f3c-46d1-8488-5d3557436ed9.dict is removed due to COLLECTED
18/07/06 18:11:03 INFO DictionaryManager: DictionaryManager(452625009) loading DictionaryInfo(loadDictObj:true) at /dict/GZHG_DATA.PMS_PASSENGER_MANIFEST_BODY_NEW/FLIGHT_DATE/99e24ce4-f801-471e-af6b-89c7a483c9d8.dict
18/07/06 18:11:03 INFO DictionaryManager: DictionaryManager(452625009) loading DictionaryInfo(loadDictObj:true) at /dict/GZHG_DATA.PMS_PASSENGER_MANIFEST_BODY_NEW/CERT_NO/e71dd8c5-767d-4405-b502-9ab50fb1ff20.dict
Exception in thread "main" com.google.common.util.concurrent.ExecutionError: java.lang.OutOfMemoryError: Java heap space
	at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2261)
	at com.google.common.cache.LocalCache.get(LocalCache.java:4000)
	at com.google.common.cache.LocalCache.getOrLoad(LocalCache.java:4004)
	at com.google.common.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874)
	at org.apache.kylin.dict.DictionaryManager.getDictionaryInfo(DictionaryManager.java:104)
	at org.apache.kylin.cube.CubeManager$DictionaryAssist.getDictionary(CubeManager.java:1109)
	at org.apache.kylin.cube.CubeManager.getDictionary(CubeManager.java:1042)
	at org.apache.kylin.cube.CubeSegment.getDictionary(CubeSegment.java:324)
	at org.apache.kylin.cube.kv.CubeDimEncMap.getDictionary(CubeDimEncMap.java:86)
	at org.apache.kylin.cube.kv.CubeDimEncMap.get(CubeDimEncMap.java:65)
	at org.apache.kylin.engine.mr.common.CubeStatsReader.getCuboidSizeMapFromRowCount(CubeStatsReader.java:196)
	at org.apache.kylin.engine.mr.common.CubeStatsReader.getCuboidSizeMap(CubeStatsReader.java:161)
	at org.apache.kylin.engine.mr.common.CubeStatsReader.estimateLayerSize(CubeStatsReader.java:278)
	at org.apache.kylin.engine.spark.SparkCubingByLayer.estimateRDDPartitionNum(SparkCubingByLayer.java:254)
	at org.apache.kylin.engine.spark.SparkCubingByLayer.execute(SparkCubingByLayer.java:234)
	at org.apache.kylin.common.util.AbstractApplication.execute(AbstractApplication.java:37)
	at org.apache.kylin.common.util.SparkEntry.main(SparkEntry.java:44)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:606)
	at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:744)
	at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:187)
	at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:212)
	at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:126)
	at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.OutOfMemoryError: Java heap space
	at org.apache.kylin.dict.TrieDictionary.readFields(TrieDictionary.java:335)
	at org.apache.kylin.dict.TrieDictionaryForest.readFields(TrieDictionaryForest.java:234)
	at org.apache.kylin.dict.DictionaryInfoSerializer.deserialize(DictionaryInfoSerializer.java:74)
	at org.apache.kylin.dict.DictionaryInfoSerializer.deserialize(DictionaryInfoSerializer.java:34)
	at org.apache.kylin.common.persistence.ResourceStore.getResource(ResourceStore.java:181)
	at org.apache.kylin.dict.DictionaryManager.load(DictionaryManager.java:404)
	at org.apache.kylin.dict.DictionaryManager$1.load(DictionaryManager.java:83)
	at org.apache.kylin.dict.DictionaryManager$1.load(DictionaryManager.java:80)
	at com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599)
	at com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2379)
	at com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342)
	at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2257)
	at com.google.common.cache.LocalCache.get(LocalCache.java:4000)
	at com.google.common.cache.LocalCache.getOrLoad(LocalCache.java:4004)
	at com.google.common.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874)
	at org.apache.kylin.dict.DictionaryManager.getDictionaryInfo(DictionaryManager.java:104)
	at org.apache.kylin.cube.CubeManager$DictionaryAssist.getDictionary(CubeManager.java:1109)
	at org.apache.kylin.cube.CubeManager.getDictionary(CubeManager.java:1042)
	at org.apache.kylin.cube.CubeSegment.getDictionary(CubeSegment.java:324)
	at org.apache.kylin.cube.kv.CubeDimEncMap.getDictionary(CubeDimEncMap.java:86)
	at org.apache.kylin.cube.kv.CubeDimEncMap.get(CubeDimEncMap.java:65)
	at org.apache.kylin.engine.mr.common.CubeStatsReader.getCuboidSizeMapFromRowCount(CubeStatsReader.java:196)
	at org.apache.kylin.engine.mr.common.CubeStatsReader.getCuboidSizeMap(CubeStatsReader.java:161)
	at org.apache.kylin.engine.mr.common.CubeStatsReader.estimateLayerSize(CubeStatsReader.java:278)
	at org.apache.kylin.engine.spark.SparkCubingByLayer.estimateRDDPartitionNum(SparkCubingByLayer.java:254)
	at org.apache.kylin.engine.spark.SparkCubingByLayer.execute(SparkCubingByLayer.java:234)
	at org.apache.kylin.common.util.AbstractApplication.execute(AbstractApplication.java:37)
	at org.apache.kylin.common.util.SparkEntry.main(SparkEntry.java:44)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:606)
18/07/06 18:11:04 INFO SparkContext: Invoking stop() from shutdown hook