You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Guoqiang Li (JIRA)" <ji...@apache.org> on 2014/08/20 10:22:26 UTC

[jira] [Resolved] (SPARK-934) spark-mllib occasionally throw java.io.IOException (java.io.IOException: Corrupt data: overrun in decompress, input offset 51381, output offset 57509)

     [ https://issues.apache.org/jira/browse/SPARK-934?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Guoqiang Li resolved SPARK-934.
-------------------------------

    Resolution: Fixed

> spark-mllib occasionally throw java.io.IOException (java.io.IOException: Corrupt data: overrun in decompress, input offset 51381, output offset 57509)
> ------------------------------------------------------------------------------------------------------------------------------------------------------
>
>                 Key: SPARK-934
>                 URL: https://issues.apache.org/jira/browse/SPARK-934
>             Project: Spark
>          Issue Type: Bug
>    Affects Versions: 0.9.0
>            Reporter: Guoqiang Li
>
> java.io.IOException (java.io.IOException: Corrupt data: overrun in decompress, input offset 51381, output offset 57509)
> com.ning.compress.lzf.LZFDecoder.decompressChunk(LZFDecoder.java:346)
> com.ning.compress.lzf.LZFDecoder.decompressChunk(LZFDecoder.java:192)
> com.ning.compress.lzf.LZFInputStream.readyBuffer(LZFInputStream.java:254)
> com.ning.compress.lzf.LZFInputStream.read(LZFInputStream.java:129)
> java.io.ObjectInputStream$PeekInputStream.read(ObjectInputStream.java:2309)
> java.io.ObjectInputStream$PeekInputStream.readFully(ObjectInputStream.java:2322)
> java.io.ObjectInputStream$BlockDataInputStream.readDoubles(ObjectInputStream.java:3012)
> java.io.ObjectInputStream.readArray(ObjectInputStream.java:1691)
> java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1342)
> java.io.ObjectInputStream.readArray(ObjectInputStream.java:1704)
> java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1342)
> java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1989)
> java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1913)
> java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1796)
> java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1348)
> java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1989)
> java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1913)
> java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1796)
> java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1348)
> java.io.ObjectInputStream.readObject(ObjectInputStream.java:370)
> org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:39)
> org.apache.spark.serializer.DeserializationStream$$anon$1.getNext(Serializer.scala:101)
> org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:71)
> scala.collection.Iterator$$anon$21.hasNext(Iterator.scala:440)
> org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:26)
> org.apache.spark.Aggregator.combineValuesByKey(Aggregator.scala:40)
> org.apache.spark.rdd.PairRDDFunctions$$anonfun$combineByKey$3.apply(PairRDDFunctions.scala:98)
> org.apache.spark.rdd.PairRDDFunctions$$anonfun$combineByKey$3.apply(PairRDDFunctions.scala:98)
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:36)
> org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:237)
> org.apache.spark.rdd.RDD.iterator(RDD.scala:226)
> org.apache.spark.rdd.CoGroupedRDD$$anonfun$compute$2.apply(CoGroupedRDD.scala:121)
> org.apache.spark.rdd.CoGroupedRDD$$anonfun$compute$2.apply(CoGroupedRDD.scala:118)
> scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:34)
> scala.collection.mutable.ArrayOps.foreach(ArrayOps.scala:38)
> org.apache.spark.rdd.CoGroupedRDD.compute(CoGroupedRDD.scala:118)
> org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:237)
> org.apache.spark.rdd.RDD.iterator(RDD.scala:226)
> org.apache.spark.rdd.MappedValuesRDD.compute(MappedValuesRDD.scala:32)
> org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:237)
> org.apache.spark.rdd.RDD.iterator(RDD.scala:226)
> org.apache.spark.rdd.FlatMappedValuesRDD.compute(FlatMappedValuesRDD.scala:32)
> org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:237)
> org.apache.spark.rdd.RDD.iterator(RDD.scala:226)
> org.apache.spark.rdd.MappedValuesRDD.compute(MappedValuesRDD.scala:32)
> org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:237)
> org.apache.spark.rdd.RDD.iterator(RDD.scala:226)
> org.apache.spark.rdd.FlatMappedValuesRDD.compute(FlatMappedValuesRDD.scala:32)
> org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:237)
> org.apache.spark.rdd.RDD.iterator(RDD.scala:226)
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:36)
> org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:237)
> org.apache.spark.rdd.RDD.iterator(RDD.scala:226)
> org.apache.spark.scheduler.ResultTask.run(ResultTask.scala:99)
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:158)
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
> java.lang.Thread.run(Thread.java:724)



--
This message was sent by Atlassian JIRA
(v6.2#6252)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org