You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hive.apache.org by "Lê Văn Thanh (JIRA)" <ji...@apache.org> on 2019/01/04 08:43:00 UTC

[jira] [Updated] (HIVE-21084) gc overhead limit exceeded

     [ https://issues.apache.org/jira/browse/HIVE-21084?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Lê Văn Thanh updated HIVE-21084:
--------------------------------
    Description: 
Hello , 

I have a table with 10GB data and 4GB free RAM , I tried to select data from this table to other table with ORC format but I got an error about memory limit ( I'm running the query SELECT in console ) . 

Detail bug : 




Caused by: java.util.concurrent.ExecutionException: Exception thrown by job
 at org.apache.spark.JavaFutureActionWrapper.getImpl(FutureAction.scala:272)
 at org.apache.spark.JavaFutureActionWrapper.get(FutureAction.scala:277)
 at org.apache.hadoop.hive.ql.exec.spark.status.impl.LocalSparkJobStatus.getError(LocalSparkJobStatus.java:171)
 at org.apache.hadoop.hive.ql.exec.spark.SparkTask.getSparkJobInfo(SparkTask.java:369)
 at org.apache.hadoop.hive.ql.exec.spark.SparkTask.execute(SparkTask.java:118)
 at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:199)
 at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100)
 at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:2182)
 at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1838)
 at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1525)
 at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1236)
 at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1231)
 at org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:255)
 ... 11 more
Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 6 in stage 0.0 failed 1 times, most recent failure: Lost task 6.0 in stage 0.0 (TID 7, localhost, executor driver): java.lang.OutOfMemoryError: GC overhead limit exceeded
 at java.util.Arrays.copyOfRange(Arrays.java:3664)
 at java.lang.String.<init>(String.java:207)
 at java.nio.HeapCharBuffer.toString(HeapCharBuffer.java:567)
 at java.nio.CharBuffer.toString(CharBuffer.java:1241)
 at org.apache.hadoop.io.Text.decode(Text.java:412)
 at org.apache.hadoop.io.Text.decode(Text.java:389)
 at org.apache.hadoop.io.Text.toString(Text.java:280)
 at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector.getPrimitiveJavaObject(WritableStringObjectInspector.java:46)
 at org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.getString(PrimitiveObjectInspectorUtils.java:891)
 at org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter$StringConverter.convert(PrimitiveObjectInspectorConverter.java:508)
 at org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp.evaluate(GenericUDFToUnixTimeStamp.java:127)
 at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187)
 at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
 at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator$DeferredExprObject.get(ExprNodeGenericFuncEvaluator.java:88)
 at org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseNumeric.evaluate(GenericUDFBaseNumeric.java:128)
 at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187)
 at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
 at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68)
 at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:88)
 at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897)
 at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130)
 at org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:148)
 at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:547)
 at org.apache.hadoop.hive.ql.exec.spark.SparkMapRecordHandler.processRow(SparkMapRecordHandler.java:136)
 at org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:48)
 at org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:27)
 at org.apache.hadoop.hive.ql.exec.spark.HiveBaseFunctionResultList.hasNext(HiveBaseFunctionResultList.java:85)
 at scala.collection.convert.Wrappers$JIteratorWrapper.hasNext(Wrappers.scala:42)
 at scala.collection.Iterator$class.foreach(Iterator.scala:893)
 at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
 at org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127)
 at org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127)

Driver stacktrace:
 at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1499)
 at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1487)
 at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1486)
 at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
 at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
 at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1486)
 at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
 at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
 at scala.Option.foreach(Option.scala:257)
 at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814)
 at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1714)
 at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1669)
 at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1658)
 at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
Caused by: java.lang.OutOfMemoryError: GC overhead limit exceeded
 at java.util.Arrays.copyOfRange(Arrays.java:3664)
 at java.lang.String.<init>(String.java:207)
 at java.nio.HeapCharBuffer.toString(HeapCharBuffer.java:567)
 at java.nio.CharBuffer.toString(CharBuffer.java:1241)
 at org.apache.hadoop.io.Text.decode(Text.java:412)
 at org.apache.hadoop.io.Text.decode(Text.java:389)
 at org.apache.hadoop.io.Text.toString(Text.java:280)
 at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector.getPrimitiveJavaObject(WritableStringObjectInspector.java:46)
 at org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.getString(PrimitiveObjectInspectorUtils.java:891)
 at org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter$StringConverter.convert(PrimitiveObjectInspectorConverter.java:508)
 at org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp.evaluate(GenericUDFToUnixTimeStamp.java:127)
 at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187)
 at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
 at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator$DeferredExprObject.get(ExprNodeGenericFuncEvaluator.java:88)
 at org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseNumeric.evaluate(GenericUDFBaseNumeric.java:128)
 at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187)
 at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
 at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68)
 at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:88)
 at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897)
 at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130)
 at org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:148)
 at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:547)
 at org.apache.hadoop.hive.ql.exec.spark.SparkMapRecordHandler.processRow(SparkMapRecordHandler.java:136)
 at org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:48)
 at org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:27)
 at org.apache.hadoop.hive.ql.exec.spark.HiveBaseFunctionResultList.hasNext(HiveBaseFunctionResultList.java:85)
 at scala.collection.convert.Wrappers$JIteratorWrapper.hasNext(Wrappers.scala:42)
 at scala.collection.Iterator$class.foreach(Iterator.scala:893)
 at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
 at org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127)
 at org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127)





How to setup Apache Spark to use local hard disk when data does not fit in RAM in local mode?

  was:
Hello , 

I have a table with 10GB data and 4GB free RAM , I tried to select data from this table to other table with ORC format but I got an error about memory limit ( I'm running the query SELECT in console ) . 

How to setup Apache Spark to use local hard disk when data does not fit in RAM in local mode?


> gc overhead limit exceeded 
> ---------------------------
>
>                 Key: HIVE-21084
>                 URL: https://issues.apache.org/jira/browse/HIVE-21084
>             Project: Hive
>          Issue Type: Bug
>         Environment: Ubuntu 16.04
> Hive 2.3.0
> Spark 2.0.0
>            Reporter: Lê Văn Thanh
>            Priority: Critical
>
> Hello , 
> I have a table with 10GB data and 4GB free RAM , I tried to select data from this table to other table with ORC format but I got an error about memory limit ( I'm running the query SELECT in console ) . 
> Detail bug : 
> Caused by: java.util.concurrent.ExecutionException: Exception thrown by job
>  at org.apache.spark.JavaFutureActionWrapper.getImpl(FutureAction.scala:272)
>  at org.apache.spark.JavaFutureActionWrapper.get(FutureAction.scala:277)
>  at org.apache.hadoop.hive.ql.exec.spark.status.impl.LocalSparkJobStatus.getError(LocalSparkJobStatus.java:171)
>  at org.apache.hadoop.hive.ql.exec.spark.SparkTask.getSparkJobInfo(SparkTask.java:369)
>  at org.apache.hadoop.hive.ql.exec.spark.SparkTask.execute(SparkTask.java:118)
>  at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:199)
>  at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100)
>  at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:2182)
>  at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1838)
>  at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1525)
>  at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1236)
>  at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1231)
>  at org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:255)
>  ... 11 more
> Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 6 in stage 0.0 failed 1 times, most recent failure: Lost task 6.0 in stage 0.0 (TID 7, localhost, executor driver): java.lang.OutOfMemoryError: GC overhead limit exceeded
>  at java.util.Arrays.copyOfRange(Arrays.java:3664)
>  at java.lang.String.<init>(String.java:207)
>  at java.nio.HeapCharBuffer.toString(HeapCharBuffer.java:567)
>  at java.nio.CharBuffer.toString(CharBuffer.java:1241)
>  at org.apache.hadoop.io.Text.decode(Text.java:412)
>  at org.apache.hadoop.io.Text.decode(Text.java:389)
>  at org.apache.hadoop.io.Text.toString(Text.java:280)
>  at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector.getPrimitiveJavaObject(WritableStringObjectInspector.java:46)
>  at org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.getString(PrimitiveObjectInspectorUtils.java:891)
>  at org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter$StringConverter.convert(PrimitiveObjectInspectorConverter.java:508)
>  at org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp.evaluate(GenericUDFToUnixTimeStamp.java:127)
>  at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187)
>  at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
>  at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator$DeferredExprObject.get(ExprNodeGenericFuncEvaluator.java:88)
>  at org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseNumeric.evaluate(GenericUDFBaseNumeric.java:128)
>  at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187)
>  at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
>  at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68)
>  at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:88)
>  at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897)
>  at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130)
>  at org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:148)
>  at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:547)
>  at org.apache.hadoop.hive.ql.exec.spark.SparkMapRecordHandler.processRow(SparkMapRecordHandler.java:136)
>  at org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:48)
>  at org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:27)
>  at org.apache.hadoop.hive.ql.exec.spark.HiveBaseFunctionResultList.hasNext(HiveBaseFunctionResultList.java:85)
>  at scala.collection.convert.Wrappers$JIteratorWrapper.hasNext(Wrappers.scala:42)
>  at scala.collection.Iterator$class.foreach(Iterator.scala:893)
>  at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
>  at org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127)
>  at org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127)
> Driver stacktrace:
>  at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1499)
>  at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1487)
>  at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1486)
>  at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
>  at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
>  at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1486)
>  at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
>  at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
>  at scala.Option.foreach(Option.scala:257)
>  at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814)
>  at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1714)
>  at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1669)
>  at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1658)
>  at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
> Caused by: java.lang.OutOfMemoryError: GC overhead limit exceeded
>  at java.util.Arrays.copyOfRange(Arrays.java:3664)
>  at java.lang.String.<init>(String.java:207)
>  at java.nio.HeapCharBuffer.toString(HeapCharBuffer.java:567)
>  at java.nio.CharBuffer.toString(CharBuffer.java:1241)
>  at org.apache.hadoop.io.Text.decode(Text.java:412)
>  at org.apache.hadoop.io.Text.decode(Text.java:389)
>  at org.apache.hadoop.io.Text.toString(Text.java:280)
>  at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector.getPrimitiveJavaObject(WritableStringObjectInspector.java:46)
>  at org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.getString(PrimitiveObjectInspectorUtils.java:891)
>  at org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter$StringConverter.convert(PrimitiveObjectInspectorConverter.java:508)
>  at org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp.evaluate(GenericUDFToUnixTimeStamp.java:127)
>  at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187)
>  at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
>  at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator$DeferredExprObject.get(ExprNodeGenericFuncEvaluator.java:88)
>  at org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseNumeric.evaluate(GenericUDFBaseNumeric.java:128)
>  at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187)
>  at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
>  at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68)
>  at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:88)
>  at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897)
>  at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130)
>  at org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:148)
>  at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:547)
>  at org.apache.hadoop.hive.ql.exec.spark.SparkMapRecordHandler.processRow(SparkMapRecordHandler.java:136)
>  at org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:48)
>  at org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:27)
>  at org.apache.hadoop.hive.ql.exec.spark.HiveBaseFunctionResultList.hasNext(HiveBaseFunctionResultList.java:85)
>  at scala.collection.convert.Wrappers$JIteratorWrapper.hasNext(Wrappers.scala:42)
>  at scala.collection.Iterator$class.foreach(Iterator.scala:893)
>  at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
>  at org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127)
>  at org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127)
> How to setup Apache Spark to use local hard disk when data does not fit in RAM in local mode?



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)