You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@mahout.apache.org by "Suneel Marthi (JIRA)" <ji...@apache.org> on 2016/04/08 10:46:25 UTC
[jira] [Commented] (MAHOUT-1818) dals test failing in
Flink-bindings
[ https://issues.apache.org/jira/browse/MAHOUT-1818?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15231885#comment-15231885 ]
Suneel Marthi commented on MAHOUT-1818:
---------------------------------------
We don't see the OOM anymore but the below error also from FlinkOpAtA.slim()
{Code}
04/08/2016 03:52:13 CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.drm.RowsFlinkDrm.asBlockified(FlinkDrm.scala:52)) -> Map (Map at org.apache.mahout.flinkbindings.blas.FlinkOpMapBlock$.apply(FlinkOpMapBlock.scala:38)) -> FlatMap (FlatMap at org.apache.mahout.flinkbindings.drm.BlockifiedFlinkDrm.asRowWise(FlinkDrm.scala:93))(1/8) switched to CANCELED
04/08/2016 03:52:13 CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.drm.RowsFlinkDrm.asBlockified(FlinkDrm.scala:52)) -> Map (Map at org.apache.mahout.flinkbindings.blas.FlinkOpMapBlock$.apply(FlinkOpMapBlock.scala:38)) -> FlatMap (FlatMap at org.apache.mahout.flinkbindings.drm.BlockifiedFlinkDrm.asRowWise(FlinkDrm.scala:93))(2/8) switched to CANCELED
04/08/2016 03:52:13 CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.drm.RowsFlinkDrm.asBlockified(FlinkDrm.scala:52)) -> Map (Map at org.apache.mahout.flinkbindings.blas.FlinkOpMapBlock$.apply(FlinkOpMapBlock.scala:38)) -> FlatMap (FlatMap at org.apache.mahout.flinkbindings.drm.BlockifiedFlinkDrm.asRowWise(FlinkDrm.scala:93))(3/8) switched to CANCELED
04/08/2016 03:52:13 CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.drm.RowsFlinkDrm.asBlockified(FlinkDrm.scala:52)) -> Map (Map at org.apache.mahout.flinkbindings.blas.FlinkOpMapBlock$.apply(FlinkOpMapBlock.scala:38)) -> FlatMap (FlatMap at org.apache.mahout.flinkbindings.drm.BlockifiedFlinkDrm.asRowWise(FlinkDrm.scala:93))(4/8) switched to CANCELED
04/08/2016 03:52:13 CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.drm.RowsFlinkDrm.asBlockified(FlinkDrm.scala:52)) -> Map (Map at org.apache.mahout.flinkbindings.blas.FlinkOpMapBlock$.apply(FlinkOpMapBlock.scala:38)) -> FlatMap (FlatMap at org.apache.mahout.flinkbindings.drm.BlockifiedFlinkDrm.asRowWise(FlinkDrm.scala:93))(5/8) switched to CANCELED
04/08/2016 03:52:13 CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.drm.RowsFlinkDrm.asBlockified(FlinkDrm.scala:52)) -> Map (Map at org.apache.mahout.flinkbindings.blas.FlinkOpMapBlock$.apply(FlinkOpMapBlock.scala:38)) -> FlatMap (FlatMap at org.apache.mahout.flinkbindings.drm.BlockifiedFlinkDrm.asRowWise(FlinkDrm.scala:93))(6/8) switched to CANCELED
04/08/2016 03:52:13 CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.drm.RowsFlinkDrm.asBlockified(FlinkDrm.scala:52)) -> Map (Map at org.apache.mahout.flinkbindings.blas.FlinkOpMapBlock$.apply(FlinkOpMapBlock.scala:38)) -> FlatMap (FlatMap at org.apache.mahout.flinkbindings.drm.BlockifiedFlinkDrm.asRowWise(FlinkDrm.scala:93))(7/8) switched to CANCELED
04/08/2016 03:52:13 CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.drm.RowsFlinkDrm.asBlockified(FlinkDrm.scala:52)) -> Map (Map at org.apache.mahout.flinkbindings.blas.FlinkOpMapBlock$.apply(FlinkOpMapBlock.scala:38)) -> FlatMap (FlatMap at org.apache.mahout.flinkbindings.drm.BlockifiedFlinkDrm.asRowWise(FlinkDrm.scala:93))(8/8) switched to CANCELED
04/08/2016 03:52:13 CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:54)) -> Combine (Reduce at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:101))(1/8) switched to CANCELED
04/08/2016 03:52:13 CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:54)) -> Combine (Reduce at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:101))(2/8) switched to CANCELED
04/08/2016 03:52:13 CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:54)) -> Combine (Reduce at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:101))(3/8) switched to CANCELED
04/08/2016 03:52:13 CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:54)) -> Combine (Reduce at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:101))(4/8) switched to CANCELED
04/08/2016 03:52:13 CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:54)) -> Combine (Reduce at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:101))(5/8) switched to CANCELED
04/08/2016 03:52:13 CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:54)) -> Combine (Reduce at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:101))(6/8) switched to CANCELED
04/08/2016 03:52:13 CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:54)) -> Combine (Reduce at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:101))(7/8) switched to CANCELED
04/08/2016 03:52:13 CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:54)) -> Combine (Reduce at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:101))(8/8) switched to CANCELED
04/08/2016 03:52:13 Reduce (Reduce at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:101))(1/1) switched to CANCELED
04/08/2016 03:52:13 DataSink (org.apache.flink.api.java.Utils$CollectHelper@2c663246)(1/1) switched to CANCELED
04/08/2016 03:52:13 Job execution switched to status FAILED.
- dals *** FAILED ***
org.apache.flink.runtime.client.JobExecutionException: Job execution failed.
at org.apache.flink.runtime.jobmanager.JobManager$$anonfun$handleMessage$1$$anonfun$applyOrElse$7.apply$mcV$sp(JobManager.scala:716)
at org.apache.flink.runtime.jobmanager.JobManager$$anonfun$handleMessage$1$$anonfun$applyOrElse$7.apply(JobManager.scala:662)
at org.apache.flink.runtime.jobmanager.JobManager$$anonfun$handleMessage$1$$anonfun$applyOrElse$7.apply(JobManager.scala:662)
at scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24)
at scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24)
at akka.dispatch.TaskInvocation.run(AbstractDispatcher.scala:41)
at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:401)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.pollAndExecAll(ForkJoinPool.java:1253)
at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1346)
...
Cause: java.lang.IllegalStateException: unread block data
at java.io.ObjectInputStream$BlockDataInputStream.setBlockDataMode(ObjectInputStream.java:2431)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1383)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2000)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1924)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:371)
at org.apache.flink.util.InstantiationUtil.deserializeObject(InstantiationUtil.java:290)
at org.apache.flink.util.InstantiationUtil.readObjectFromConfig(InstantiationUtil.java:248)
at org.apache.flink.runtime.operators.util.TaskConfig.getStubWrapper(TaskConfig.java:282)
...
{Code}
> dals test failing in Flink-bindings
> -----------------------------------
>
> Key: MAHOUT-1818
> URL: https://issues.apache.org/jira/browse/MAHOUT-1818
> Project: Mahout
> Issue Type: Bug
> Components: Flink
> Affects Versions: 0.11.2
> Reporter: Andrew Palumbo
> Assignee: Andrew Palumbo
> Priority: Blocker
> Fix For: 0.12.0
>
>
> {{dals}} test fails in Flink bindings with an OOM. Numerically the test passes, when the matrix being decomposed in the test lowered to the size 50 x 50. But the default size of the matrix in the {{DistributedDecompositionsSuiteBase}} is 500 x 500.
> {code}
> java.lang.OutOfMemoryError: Java heap space
> at java.util.Arrays.copyOf(Arrays.java:2271)
> at java.io.ByteArrayOutputStream.grow(ByteArrayOutputStream.java:118)
> at java.io.ByteArrayOutputStream.ensureCapacity(ByteArrayOutputStream.java:93)
> at java.io.ByteArrayOutputStream.write(ByteArrayOutputStream.java:153)
> at java.io.ObjectOutputStream$BlockDataOutputStream.writeBlockHeader(ObjectOutputStream.java:1893)
> at java.io.ObjectOutputStream$BlockDataOutputStream.drain(ObjectOutputStream.java:1874)
> at java.io.ObjectOutputStream$BlockDataOutputStream.setBlockDataMode(ObjectOutputStream.java:1785)
> at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1188)
> at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1547)
> at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1508)
> at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1431)
> at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177)
> at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:347)
> at org.apache.flink.util.InstantiationUtil.serializeObject(InstantiationUtil.java:300)
> at org.apache.flink.util.InstantiationUtil.writeObjectToConfig(InstantiationUtil.java:252)
> at org.apache.flink.runtime.operators.util.TaskConfig.setStubWrapper(TaskConfig.java:273)
> at org.apache.flink.optimizer.plantranslate.JobGraphGenerator.createDataSourceVertex(JobGraphGenerator.java:893)
> at org.apache.flink.optimizer.plantranslate.JobGraphGenerator.preVisit(JobGraphGenerator.java:286)
> at org.apache.flink.optimizer.plantranslate.JobGraphGenerator.preVisit(JobGraphGenerator.java:109)
> at org.apache.flink.optimizer.plan.SourcePlanNode.accept(SourcePlanNode.java:86)
> at org.apache.flink.optimizer.plan.SingleInputPlanNode.accept(SingleInputPlanNode.java:199)
> at org.apache.flink.optimizer.plan.SingleInputPlanNode.accept(SingleInputPlanNode.java:199)
> at org.apache.flink.optimizer.plan.SingleInputPlanNode.accept(SingleInputPlanNode.java:199)
> at org.apache.flink.optimizer.plan.SingleInputPlanNode.accept(SingleInputPlanNode.java:199)
> at org.apache.flink.optimizer.plan.SingleInputPlanNode.accept(SingleInputPlanNode.java:199)
> at org.apache.flink.optimizer.plan.SingleInputPlanNode.accept(SingleInputPlanNode.java:199)
> at org.apache.flink.optimizer.plan.SingleInputPlanNode.accept(SingleInputPlanNode.java:199)
> at org.apache.flink.optimizer.plan.SingleInputPlanNode.accept(SingleInputPlanNode.java:199)
> at org.apache.flink.optimizer.plan.SingleInputPlanNode.accept(SingleInputPlanNode.java:199)
> at org.apache.flink.optimizer.plan.SingleInputPlanNode.accept(SingleInputPlanNode.java:199)
> at org.apache.flink.optimizer.plan.OptimizedPlan.accept(OptimizedPlan.java:128)
> at org.apache.flink.optimizer.plantranslate.JobGraphGenerator.compileJobGraph(JobGraphGenerator.java:188)
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)