You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hive.apache.org by "Lê Văn Thanh (JIRA)" <ji...@apache.org> on 2019/01/04 08:43:00 UTC
[jira] [Updated] (HIVE-21084) gc overhead limit exceeded
[ https://issues.apache.org/jira/browse/HIVE-21084?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Lê Văn Thanh updated HIVE-21084:
--------------------------------
Description:
Hello ,
I have a table with 10GB data and 4GB free RAM , I tried to select data from this table to other table with ORC format but I got an error about memory limit ( I'm running the query SELECT in console ) .
Detail bug :
Caused by: java.util.concurrent.ExecutionException: Exception thrown by job
at org.apache.spark.JavaFutureActionWrapper.getImpl(FutureAction.scala:272)
at org.apache.spark.JavaFutureActionWrapper.get(FutureAction.scala:277)
at org.apache.hadoop.hive.ql.exec.spark.status.impl.LocalSparkJobStatus.getError(LocalSparkJobStatus.java:171)
at org.apache.hadoop.hive.ql.exec.spark.SparkTask.getSparkJobInfo(SparkTask.java:369)
at org.apache.hadoop.hive.ql.exec.spark.SparkTask.execute(SparkTask.java:118)
at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:199)
at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100)
at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:2182)
at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1838)
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1525)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1236)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1231)
at org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:255)
... 11 more
Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 6 in stage 0.0 failed 1 times, most recent failure: Lost task 6.0 in stage 0.0 (TID 7, localhost, executor driver): java.lang.OutOfMemoryError: GC overhead limit exceeded
at java.util.Arrays.copyOfRange(Arrays.java:3664)
at java.lang.String.<init>(String.java:207)
at java.nio.HeapCharBuffer.toString(HeapCharBuffer.java:567)
at java.nio.CharBuffer.toString(CharBuffer.java:1241)
at org.apache.hadoop.io.Text.decode(Text.java:412)
at org.apache.hadoop.io.Text.decode(Text.java:389)
at org.apache.hadoop.io.Text.toString(Text.java:280)
at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector.getPrimitiveJavaObject(WritableStringObjectInspector.java:46)
at org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.getString(PrimitiveObjectInspectorUtils.java:891)
at org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter$StringConverter.convert(PrimitiveObjectInspectorConverter.java:508)
at org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp.evaluate(GenericUDFToUnixTimeStamp.java:127)
at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187)
at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator$DeferredExprObject.get(ExprNodeGenericFuncEvaluator.java:88)
at org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseNumeric.evaluate(GenericUDFBaseNumeric.java:128)
at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187)
at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68)
at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:88)
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897)
at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130)
at org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:148)
at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:547)
at org.apache.hadoop.hive.ql.exec.spark.SparkMapRecordHandler.processRow(SparkMapRecordHandler.java:136)
at org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:48)
at org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:27)
at org.apache.hadoop.hive.ql.exec.spark.HiveBaseFunctionResultList.hasNext(HiveBaseFunctionResultList.java:85)
at scala.collection.convert.Wrappers$JIteratorWrapper.hasNext(Wrappers.scala:42)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127)
at org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1499)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1487)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1486)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1486)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1714)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1669)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1658)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
Caused by: java.lang.OutOfMemoryError: GC overhead limit exceeded
at java.util.Arrays.copyOfRange(Arrays.java:3664)
at java.lang.String.<init>(String.java:207)
at java.nio.HeapCharBuffer.toString(HeapCharBuffer.java:567)
at java.nio.CharBuffer.toString(CharBuffer.java:1241)
at org.apache.hadoop.io.Text.decode(Text.java:412)
at org.apache.hadoop.io.Text.decode(Text.java:389)
at org.apache.hadoop.io.Text.toString(Text.java:280)
at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector.getPrimitiveJavaObject(WritableStringObjectInspector.java:46)
at org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.getString(PrimitiveObjectInspectorUtils.java:891)
at org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter$StringConverter.convert(PrimitiveObjectInspectorConverter.java:508)
at org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp.evaluate(GenericUDFToUnixTimeStamp.java:127)
at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187)
at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator$DeferredExprObject.get(ExprNodeGenericFuncEvaluator.java:88)
at org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseNumeric.evaluate(GenericUDFBaseNumeric.java:128)
at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187)
at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68)
at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:88)
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897)
at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130)
at org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:148)
at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:547)
at org.apache.hadoop.hive.ql.exec.spark.SparkMapRecordHandler.processRow(SparkMapRecordHandler.java:136)
at org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:48)
at org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:27)
at org.apache.hadoop.hive.ql.exec.spark.HiveBaseFunctionResultList.hasNext(HiveBaseFunctionResultList.java:85)
at scala.collection.convert.Wrappers$JIteratorWrapper.hasNext(Wrappers.scala:42)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127)
at org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127)
How to setup Apache Spark to use local hard disk when data does not fit in RAM in local mode?
was:
Hello ,
I have a table with 10GB data and 4GB free RAM , I tried to select data from this table to other table with ORC format but I got an error about memory limit ( I'm running the query SELECT in console ) .
How to setup Apache Spark to use local hard disk when data does not fit in RAM in local mode?
> gc overhead limit exceeded
> ---------------------------
>
> Key: HIVE-21084
> URL: https://issues.apache.org/jira/browse/HIVE-21084
> Project: Hive
> Issue Type: Bug
> Environment: Ubuntu 16.04
> Hive 2.3.0
> Spark 2.0.0
> Reporter: Lê Văn Thanh
> Priority: Critical
>
> Hello ,
> I have a table with 10GB data and 4GB free RAM , I tried to select data from this table to other table with ORC format but I got an error about memory limit ( I'm running the query SELECT in console ) .
> Detail bug :
> Caused by: java.util.concurrent.ExecutionException: Exception thrown by job
> at org.apache.spark.JavaFutureActionWrapper.getImpl(FutureAction.scala:272)
> at org.apache.spark.JavaFutureActionWrapper.get(FutureAction.scala:277)
> at org.apache.hadoop.hive.ql.exec.spark.status.impl.LocalSparkJobStatus.getError(LocalSparkJobStatus.java:171)
> at org.apache.hadoop.hive.ql.exec.spark.SparkTask.getSparkJobInfo(SparkTask.java:369)
> at org.apache.hadoop.hive.ql.exec.spark.SparkTask.execute(SparkTask.java:118)
> at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:199)
> at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100)
> at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:2182)
> at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1838)
> at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1525)
> at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1236)
> at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1231)
> at org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:255)
> ... 11 more
> Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 6 in stage 0.0 failed 1 times, most recent failure: Lost task 6.0 in stage 0.0 (TID 7, localhost, executor driver): java.lang.OutOfMemoryError: GC overhead limit exceeded
> at java.util.Arrays.copyOfRange(Arrays.java:3664)
> at java.lang.String.<init>(String.java:207)
> at java.nio.HeapCharBuffer.toString(HeapCharBuffer.java:567)
> at java.nio.CharBuffer.toString(CharBuffer.java:1241)
> at org.apache.hadoop.io.Text.decode(Text.java:412)
> at org.apache.hadoop.io.Text.decode(Text.java:389)
> at org.apache.hadoop.io.Text.toString(Text.java:280)
> at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector.getPrimitiveJavaObject(WritableStringObjectInspector.java:46)
> at org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.getString(PrimitiveObjectInspectorUtils.java:891)
> at org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter$StringConverter.convert(PrimitiveObjectInspectorConverter.java:508)
> at org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp.evaluate(GenericUDFToUnixTimeStamp.java:127)
> at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187)
> at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
> at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator$DeferredExprObject.get(ExprNodeGenericFuncEvaluator.java:88)
> at org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseNumeric.evaluate(GenericUDFBaseNumeric.java:128)
> at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187)
> at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
> at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68)
> at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:88)
> at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897)
> at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130)
> at org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:148)
> at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:547)
> at org.apache.hadoop.hive.ql.exec.spark.SparkMapRecordHandler.processRow(SparkMapRecordHandler.java:136)
> at org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:48)
> at org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:27)
> at org.apache.hadoop.hive.ql.exec.spark.HiveBaseFunctionResultList.hasNext(HiveBaseFunctionResultList.java:85)
> at scala.collection.convert.Wrappers$JIteratorWrapper.hasNext(Wrappers.scala:42)
> at scala.collection.Iterator$class.foreach(Iterator.scala:893)
> at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
> at org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127)
> at org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127)
> Driver stacktrace:
> at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1499)
> at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1487)
> at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1486)
> at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
> at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1486)
> at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
> at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
> at scala.Option.foreach(Option.scala:257)
> at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814)
> at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1714)
> at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1669)
> at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1658)
> at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
> Caused by: java.lang.OutOfMemoryError: GC overhead limit exceeded
> at java.util.Arrays.copyOfRange(Arrays.java:3664)
> at java.lang.String.<init>(String.java:207)
> at java.nio.HeapCharBuffer.toString(HeapCharBuffer.java:567)
> at java.nio.CharBuffer.toString(CharBuffer.java:1241)
> at org.apache.hadoop.io.Text.decode(Text.java:412)
> at org.apache.hadoop.io.Text.decode(Text.java:389)
> at org.apache.hadoop.io.Text.toString(Text.java:280)
> at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector.getPrimitiveJavaObject(WritableStringObjectInspector.java:46)
> at org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.getString(PrimitiveObjectInspectorUtils.java:891)
> at org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter$StringConverter.convert(PrimitiveObjectInspectorConverter.java:508)
> at org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp.evaluate(GenericUDFToUnixTimeStamp.java:127)
> at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187)
> at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
> at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator$DeferredExprObject.get(ExprNodeGenericFuncEvaluator.java:88)
> at org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseNumeric.evaluate(GenericUDFBaseNumeric.java:128)
> at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187)
> at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
> at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68)
> at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:88)
> at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897)
> at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130)
> at org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:148)
> at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:547)
> at org.apache.hadoop.hive.ql.exec.spark.SparkMapRecordHandler.processRow(SparkMapRecordHandler.java:136)
> at org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:48)
> at org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:27)
> at org.apache.hadoop.hive.ql.exec.spark.HiveBaseFunctionResultList.hasNext(HiveBaseFunctionResultList.java:85)
> at scala.collection.convert.Wrappers$JIteratorWrapper.hasNext(Wrappers.scala:42)
> at scala.collection.Iterator$class.foreach(Iterator.scala:893)
> at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
> at org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127)
> at org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127)
> How to setup Apache Spark to use local hard disk when data does not fit in RAM in local mode?
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)