You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@spark.apache.org by KhajaAsmath Mohammed <md...@gmail.com> on 2018/01/18 04:39:19 UTC

Spark Stream is corrupted

Hi,

I have created a streaming object from checkpoint but it always through up
error as stream corrupted when I restart spark streaming job. any solution
for this?

private def createStreamingContext(
    sparkCheckpointDir: String, sparkSession: SparkSession,
    batchDuration: Int, config: com.typesafe.config.Config) = {
    val topics = config.getString(Constants.Properties.KafkaTopics)
    val topicsSet = topics.split(",").toSet
    val kafkaParams = Map[String, String]("metadata.broker.list" ->
config.getString(Constants.Properties.KafkaBrokerList))
    val ssc = new StreamingContext(sparkSession.sparkContext,
Seconds(batchDuration))
    val messages = KafkaUtils.createDirectStream[String, String,
StringDecoder, StringDecoder](ssc, kafkaParams, topicsSet)
    val datapointDStream =
messages.map(_._2).map(TransformDatapoint.parseDataPointText)
    lazy val sqlCont = sparkSession.sqlContext

    hiveDBInstance = config.getString("hiveDBInstance")

    TransformDatapoint.readDstreamData(sparkSession, sqlCont,
datapointDStream, runMode, includeIndex, indexNum, datapointTmpTableName,
fencedDPTmpTableName, fencedVINDPTmpTableName, hiveDBInstance)

    ssc.checkpoint(sparkCheckpointDir)
    ssc
  }



// calling streming context method

 val streamingContext =
StreamingContext.getOrCreate(config.getString(Constants.Properties.CheckPointDir),
() =>
createStreamingContext(config.getString(Constants.Properties.CheckPointDir),
sparkSession, config.getInt(Constants.Properties.BatchInterval), config))

*ERROR:*
org.apache.spark.SparkException: Failed to read checkpoint from directory
hdfs://prodnameservice1/user/yyy1k78/KafkaCheckPointNTDSC

java.io.IOException: Stream is corrupted


Thanks,
Asmath

Re: Spark Stream is corrupted

Posted by KhajaAsmath Mohammed <md...@gmail.com>.
Any solutions for this problem please .

Sent from my iPhone

> On Jan 17, 2018, at 10:39 PM, KhajaAsmath Mohammed <md...@gmail.com> wrote:
> 
> Hi,
> 
> I have created a streaming object from checkpoint but it always through up error as stream corrupted when I restart spark streaming job. any solution for this?
> 
> private def createStreamingContext(
>     sparkCheckpointDir: String, sparkSession: SparkSession,
>     batchDuration: Int, config: com.typesafe.config.Config) = {
>     val topics = config.getString(Constants.Properties.KafkaTopics)
>     val topicsSet = topics.split(",").toSet
>     val kafkaParams = Map[String, String]("metadata.broker.list" -> config.getString(Constants.Properties.KafkaBrokerList))
>     val ssc = new StreamingContext(sparkSession.sparkContext, Seconds(batchDuration))
>     val messages = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](ssc, kafkaParams, topicsSet)
>     val datapointDStream = messages.map(_._2).map(TransformDatapoint.parseDataPointText)
>     lazy val sqlCont = sparkSession.sqlContext
> 
>     hiveDBInstance = config.getString("hiveDBInstance")
> 
>     TransformDatapoint.readDstreamData(sparkSession, sqlCont, datapointDStream, runMode, includeIndex, indexNum, datapointTmpTableName, fencedDPTmpTableName, fencedVINDPTmpTableName, hiveDBInstance)
> 
>     ssc.checkpoint(sparkCheckpointDir)
>     ssc
>   }
> 
> 
> 
> // calling streming context method
> 
>  val streamingContext = StreamingContext.getOrCreate(config.getString(Constants.Properties.CheckPointDir), () => createStreamingContext(config.getString(Constants.Properties.CheckPointDir), sparkSession, config.getInt(Constants.Properties.BatchInterval), config))
> 
> ERROR:
> org.apache.spark.SparkException: Failed to read checkpoint from directory hdfs://prodnameservice1/user/yyy1k78/KafkaCheckPointNTDSC
> 
> java.io.IOException: Stream is corrupted
> 
> 
> Thanks,
> Asmath