You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@mahout.apache.org by "Suneel Marthi (JIRA)" <ji...@apache.org> on 2016/04/08 10:46:25 UTC

[jira] [Commented] (MAHOUT-1818) dals test failing in Flink-bindings

    [ https://issues.apache.org/jira/browse/MAHOUT-1818?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15231885#comment-15231885 ] 

Suneel Marthi commented on MAHOUT-1818:
---------------------------------------

We don't see the OOM anymore but the below error also from FlinkOpAtA.slim()

{Code}

04/08/2016 03:52:13	CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.drm.RowsFlinkDrm.asBlockified(FlinkDrm.scala:52)) -> Map (Map at org.apache.mahout.flinkbindings.blas.FlinkOpMapBlock$.apply(FlinkOpMapBlock.scala:38)) -> FlatMap (FlatMap at org.apache.mahout.flinkbindings.drm.BlockifiedFlinkDrm.asRowWise(FlinkDrm.scala:93))(1/8) switched to CANCELED 
04/08/2016 03:52:13	CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.drm.RowsFlinkDrm.asBlockified(FlinkDrm.scala:52)) -> Map (Map at org.apache.mahout.flinkbindings.blas.FlinkOpMapBlock$.apply(FlinkOpMapBlock.scala:38)) -> FlatMap (FlatMap at org.apache.mahout.flinkbindings.drm.BlockifiedFlinkDrm.asRowWise(FlinkDrm.scala:93))(2/8) switched to CANCELED 
04/08/2016 03:52:13	CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.drm.RowsFlinkDrm.asBlockified(FlinkDrm.scala:52)) -> Map (Map at org.apache.mahout.flinkbindings.blas.FlinkOpMapBlock$.apply(FlinkOpMapBlock.scala:38)) -> FlatMap (FlatMap at org.apache.mahout.flinkbindings.drm.BlockifiedFlinkDrm.asRowWise(FlinkDrm.scala:93))(3/8) switched to CANCELED 
04/08/2016 03:52:13	CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.drm.RowsFlinkDrm.asBlockified(FlinkDrm.scala:52)) -> Map (Map at org.apache.mahout.flinkbindings.blas.FlinkOpMapBlock$.apply(FlinkOpMapBlock.scala:38)) -> FlatMap (FlatMap at org.apache.mahout.flinkbindings.drm.BlockifiedFlinkDrm.asRowWise(FlinkDrm.scala:93))(4/8) switched to CANCELED 
04/08/2016 03:52:13	CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.drm.RowsFlinkDrm.asBlockified(FlinkDrm.scala:52)) -> Map (Map at org.apache.mahout.flinkbindings.blas.FlinkOpMapBlock$.apply(FlinkOpMapBlock.scala:38)) -> FlatMap (FlatMap at org.apache.mahout.flinkbindings.drm.BlockifiedFlinkDrm.asRowWise(FlinkDrm.scala:93))(5/8) switched to CANCELED 
04/08/2016 03:52:13	CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.drm.RowsFlinkDrm.asBlockified(FlinkDrm.scala:52)) -> Map (Map at org.apache.mahout.flinkbindings.blas.FlinkOpMapBlock$.apply(FlinkOpMapBlock.scala:38)) -> FlatMap (FlatMap at org.apache.mahout.flinkbindings.drm.BlockifiedFlinkDrm.asRowWise(FlinkDrm.scala:93))(6/8) switched to CANCELED 
04/08/2016 03:52:13	CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.drm.RowsFlinkDrm.asBlockified(FlinkDrm.scala:52)) -> Map (Map at org.apache.mahout.flinkbindings.blas.FlinkOpMapBlock$.apply(FlinkOpMapBlock.scala:38)) -> FlatMap (FlatMap at org.apache.mahout.flinkbindings.drm.BlockifiedFlinkDrm.asRowWise(FlinkDrm.scala:93))(7/8) switched to CANCELED 
04/08/2016 03:52:13	CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.drm.RowsFlinkDrm.asBlockified(FlinkDrm.scala:52)) -> Map (Map at org.apache.mahout.flinkbindings.blas.FlinkOpMapBlock$.apply(FlinkOpMapBlock.scala:38)) -> FlatMap (FlatMap at org.apache.mahout.flinkbindings.drm.BlockifiedFlinkDrm.asRowWise(FlinkDrm.scala:93))(8/8) switched to CANCELED 
04/08/2016 03:52:13	CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:54)) -> Combine (Reduce at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:101))(1/8) switched to CANCELED 
04/08/2016 03:52:13	CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:54)) -> Combine (Reduce at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:101))(2/8) switched to CANCELED 
04/08/2016 03:52:13	CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:54)) -> Combine (Reduce at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:101))(3/8) switched to CANCELED 
04/08/2016 03:52:13	CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:54)) -> Combine (Reduce at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:101))(4/8) switched to CANCELED 
04/08/2016 03:52:13	CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:54)) -> Combine (Reduce at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:101))(5/8) switched to CANCELED 
04/08/2016 03:52:13	CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:54)) -> Combine (Reduce at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:101))(6/8) switched to CANCELED 
04/08/2016 03:52:13	CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:54)) -> Combine (Reduce at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:101))(7/8) switched to CANCELED 
04/08/2016 03:52:13	CHAIN MapPartition (MapPartition at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:54)) -> Combine (Reduce at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:101))(8/8) switched to CANCELED 
04/08/2016 03:52:13	Reduce (Reduce at org.apache.mahout.flinkbindings.blas.FlinkOpAtA$.slim(FlinkOpAtA.scala:101))(1/1) switched to CANCELED 
04/08/2016 03:52:13	DataSink (org.apache.flink.api.java.Utils$CollectHelper@2c663246)(1/1) switched to CANCELED 
04/08/2016 03:52:13	Job execution switched to status FAILED.
- dals *** FAILED ***
  org.apache.flink.runtime.client.JobExecutionException: Job execution failed.
  at org.apache.flink.runtime.jobmanager.JobManager$$anonfun$handleMessage$1$$anonfun$applyOrElse$7.apply$mcV$sp(JobManager.scala:716)
  at org.apache.flink.runtime.jobmanager.JobManager$$anonfun$handleMessage$1$$anonfun$applyOrElse$7.apply(JobManager.scala:662)
  at org.apache.flink.runtime.jobmanager.JobManager$$anonfun$handleMessage$1$$anonfun$applyOrElse$7.apply(JobManager.scala:662)
  at scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24)
  at scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24)
  at akka.dispatch.TaskInvocation.run(AbstractDispatcher.scala:41)
  at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:401)
  at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
  at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.pollAndExecAll(ForkJoinPool.java:1253)
  at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1346)
  ...
  Cause: java.lang.IllegalStateException: unread block data
  at java.io.ObjectInputStream$BlockDataInputStream.setBlockDataMode(ObjectInputStream.java:2431)
  at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1383)
  at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2000)
  at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1924)
  at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
  at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
  at java.io.ObjectInputStream.readObject(ObjectInputStream.java:371)
  at org.apache.flink.util.InstantiationUtil.deserializeObject(InstantiationUtil.java:290)
  at org.apache.flink.util.InstantiationUtil.readObjectFromConfig(InstantiationUtil.java:248)
  at org.apache.flink.runtime.operators.util.TaskConfig.getStubWrapper(TaskConfig.java:282)
  ...

{Code}

> dals test failing in Flink-bindings
> -----------------------------------
>
>                 Key: MAHOUT-1818
>                 URL: https://issues.apache.org/jira/browse/MAHOUT-1818
>             Project: Mahout
>          Issue Type: Bug
>          Components: Flink
>    Affects Versions: 0.11.2
>            Reporter: Andrew Palumbo
>            Assignee: Andrew Palumbo
>            Priority: Blocker
>             Fix For: 0.12.0
>
>
> {{dals}} test fails in Flink bindings with an OOM.  Numerically the test passes, when the matrix being decomposed in the test  lowered to the size 50 x 50.  But the default size of the matrix in the {{DistributedDecompositionsSuiteBase}} is 500 x 500. 
> {code}
> java.lang.OutOfMemoryError: Java heap space
> 	at java.util.Arrays.copyOf(Arrays.java:2271)
> 	at java.io.ByteArrayOutputStream.grow(ByteArrayOutputStream.java:118)
> 	at java.io.ByteArrayOutputStream.ensureCapacity(ByteArrayOutputStream.java:93)
> 	at java.io.ByteArrayOutputStream.write(ByteArrayOutputStream.java:153)
> 	at java.io.ObjectOutputStream$BlockDataOutputStream.writeBlockHeader(ObjectOutputStream.java:1893)
> 	at java.io.ObjectOutputStream$BlockDataOutputStream.drain(ObjectOutputStream.java:1874)
> 	at java.io.ObjectOutputStream$BlockDataOutputStream.setBlockDataMode(ObjectOutputStream.java:1785)
> 	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1188)
> 	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1547)
> 	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1508)
> 	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1431)
> 	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177)
> 	at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:347)
> 	at org.apache.flink.util.InstantiationUtil.serializeObject(InstantiationUtil.java:300)
> 	at org.apache.flink.util.InstantiationUtil.writeObjectToConfig(InstantiationUtil.java:252)
> 	at org.apache.flink.runtime.operators.util.TaskConfig.setStubWrapper(TaskConfig.java:273)
> 	at org.apache.flink.optimizer.plantranslate.JobGraphGenerator.createDataSourceVertex(JobGraphGenerator.java:893)
> 	at org.apache.flink.optimizer.plantranslate.JobGraphGenerator.preVisit(JobGraphGenerator.java:286)
> 	at org.apache.flink.optimizer.plantranslate.JobGraphGenerator.preVisit(JobGraphGenerator.java:109)
> 	at org.apache.flink.optimizer.plan.SourcePlanNode.accept(SourcePlanNode.java:86)
> 	at org.apache.flink.optimizer.plan.SingleInputPlanNode.accept(SingleInputPlanNode.java:199)
> 	at org.apache.flink.optimizer.plan.SingleInputPlanNode.accept(SingleInputPlanNode.java:199)
> 	at org.apache.flink.optimizer.plan.SingleInputPlanNode.accept(SingleInputPlanNode.java:199)
> 	at org.apache.flink.optimizer.plan.SingleInputPlanNode.accept(SingleInputPlanNode.java:199)
> 	at org.apache.flink.optimizer.plan.SingleInputPlanNode.accept(SingleInputPlanNode.java:199)
> 	at org.apache.flink.optimizer.plan.SingleInputPlanNode.accept(SingleInputPlanNode.java:199)
> 	at org.apache.flink.optimizer.plan.SingleInputPlanNode.accept(SingleInputPlanNode.java:199)
> 	at org.apache.flink.optimizer.plan.SingleInputPlanNode.accept(SingleInputPlanNode.java:199)
> 	at org.apache.flink.optimizer.plan.SingleInputPlanNode.accept(SingleInputPlanNode.java:199)
> 	at org.apache.flink.optimizer.plan.SingleInputPlanNode.accept(SingleInputPlanNode.java:199)
> 	at org.apache.flink.optimizer.plan.OptimizedPlan.accept(OptimizedPlan.java:128)
> 	at org.apache.flink.optimizer.plantranslate.JobGraphGenerator.compileJobGraph(JobGraphGenerator.java:188)
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)