You are viewing a plain text version of this content. The canonical link for it is here.

Posted to user@spark.apache.org by Abhinav Gundapaneni <ag...@microsoft.com.INVALID> on 2021/12/17 07:09:20 UTC

Unable to use WriteStream to write to delta file.

Hello Spark community,

I’m using Apache spark(version 3.2) to read a CSV file to a dataframe using ReadStream, process the dataframe and write the dataframe to Delta file using WriteStream. I’m getting a failure during the WriteStream process. I’m trying to run the script locally in my windows 11 machine. Below is the stack trace of the error I’m facing. Please let me know if there’s anything that I’m missing.





java.lang.NoSuchMethodError: org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaConverter$.checkFieldNames(Lscala/collection/Seq;)V
        at org.apache.spark.sql.delta.schema.SchemaUtils$.checkFieldNames(SchemaUtils.scala:958)
        at org.apache.spark.sql.delta.OptimisticTransactionImpl.verifyNewMetadata(OptimisticTransaction.scala:216)
        at org.apache.spark.sql.delta.OptimisticTransactionImpl.verifyNewMetadata$(OptimisticTransaction.scala:214)
        at org.apache.spark.sql.delta.OptimisticTransaction.verifyNewMetadata(OptimisticTransaction.scala:80)
        at org.apache.spark.sql.delta.OptimisticTransactionImpl.updateMetadata(OptimisticTransaction.scala:208)
        at org.apache.spark.sql.delta.OptimisticTransactionImpl.updateMetadata$(OptimisticTransaction.scala:195)
        at org.apache.spark.sql.delta.OptimisticTransaction.updateMetadata(OptimisticTransaction.scala:80)
        at org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata(ImplicitMetadataOperation.scala:101)
        at org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata$(ImplicitMetadataOperation.scala:62)
        at org.apache.spark.sql.delta.sources.DeltaSink.updateMetadata(DeltaSink.scala:37)
        at org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata(ImplicitMetadataOperation.scala:59)
        at org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata$(ImplicitMetadataOperation.scala:50)
        at org.apache.spark.sql.delta.sources.DeltaSink.updateMetadata(DeltaSink.scala:37)
        at org.apache.spark.sql.delta.sources.DeltaSink.$anonfun$addBatch$1(DeltaSink.scala:80)
        at org.apache.spark.sql.delta.sources.DeltaSink.$anonfun$addBatch$1$adapted(DeltaSink.scala:54)
        at org.apache.spark.sql.delta.DeltaLog.withNewTransaction(DeltaLog.scala:188)
        at org.apache.spark.sql.delta.sources.DeltaSink.addBatch(DeltaSink.scala:54)
        at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$17(MicroBatchExecution.scala:600)
        at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
        at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
        at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
        at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
        at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
        at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$16(MicroBatchExecution.scala:598)
        at org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:375)
        at org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:373)
        at org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)
        at org.apache.spark.sql.execution.streaming.MicroBatchExecution.runBatch(MicroBatchExecution.scala:598)
        at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$2(MicroBatchExecution.scala:228)
        at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
        at org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:375)
        at org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:373)
        at org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)
        at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$1(MicroBatchExecution.scala:193)
        at org.apache.spark.sql.execution.streaming.OneTimeExecutor.execute(TriggerExecutor.scala:39)
        at org.apache.spark.sql.execution.streaming.MicroBatchExecution.runActivatedStream(MicroBatchExecution.scala:187)
        at org.apache.spark.sql.execution.streaming.StreamExecution.$anonfun$runStream$1(StreamExecution.scala:303)
        at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
        at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
        at org.apache.spark.sql.execution.streaming.StreamExecution.org$apache$spark$sql$execution$streaming$StreamExecution$$runStream(StreamExecution.scala:286)
        at org.apache.spark.sql.execution.streaming.StreamExecution$$anon$1.run(StreamExecution.scala:209)
Exception in thread "stream execution thread for [id = aafe131a-0785-4285-8b5e-7735b30959a7, runId = effac477-1036-498e-961b-41e9b76c68df]" java.lang.NoSuchMethodError: org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaConverter$.checkFieldNames(Lscala/collection/Seq;)V
        at org.apache.spark.sql.delta.schema.SchemaUtils$.checkFieldNames(SchemaUtils.scala:958)
        at org.apache.spark.sql.delta.OptimisticTransactionImpl.verifyNewMetadata(OptimisticTransaction.scala:216)
        at org.apache.spark.sql.delta.OptimisticTransactionImpl.verifyNewMetadata$(OptimisticTransaction.scala:214)
        at org.apache.spark.sql.delta.OptimisticTransaction.verifyNewMetadata(OptimisticTransaction.scala:80)
        at org.apache.spark.sql.delta.OptimisticTransactionImpl.updateMetadata(OptimisticTransaction.scala:208)
        at org.apache.spark.sql.delta.OptimisticTransactionImpl.updateMetadata$(OptimisticTransaction.scala:195)
        at org.apache.spark.sql.delta.OptimisticTransaction.updateMetadata(OptimisticTransaction.scala:80)
        at org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata(ImplicitMetadataOperation.scala:101)
        at org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata$(ImplicitMetadataOperation.scala:62)
        at org.apache.spark.sql.delta.sources.DeltaSink.updateMetadata(DeltaSink.scala:37)
        at org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata(ImplicitMetadataOperation.scala:59)
        at org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata$(ImplicitMetadataOperation.scala:50)
        at org.apache.spark.sql.delta.sources.DeltaSink.updateMetadata(DeltaSink.scala:37)
        at org.apache.spark.sql.delta.sources.DeltaSink.$anonfun$addBatch$1(DeltaSink.scala:80)
        at org.apache.spark.sql.delta.sources.DeltaSink.$anonfun$addBatch$1$adapted(DeltaSink.scala:54)
        at org.apache.spark.sql.delta.DeltaLog.withNewTransaction(DeltaLog.scala:188)
        at org.apache.spark.sql.delta.sources.DeltaSink.addBatch(DeltaSink.scala:54)
        at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$17(MicroBatchExecution.scala:600)
        at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
        at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
        at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
        at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
        at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
        at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$16(MicroBatchExecution.scala:598)
        at org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:375)
        at org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:373)
        at org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)
        at org.apache.spark.sql.execution.streaming.MicroBatchExecution.runBatch(MicroBatchExecution.scala:598)
        at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$2(MicroBatchExecution.scala:228)
        at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
        at org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:375)
        at org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:373)
        at org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)Traceback (most recent call last):
  File "C:\Users\agundapaneni\Development\ModernDataEstate\tests\test_mdefbasic.py", line 60, in <module>

        at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$1(MicroBatchExecution.scala:193)
        at org.apache.spark.sql.execution.streaming.OneTimeExecutor.execute(TriggerExecutor.scala:39)
        at org.apache.spark.sql.execution.streaming.MicroBatchExecution.runActivatedStream(MicroBatchExecution.scala:187)
        at org.apache.spark.sql.execution.streaming.StreamExecution.$anonfun$runStream$1(StreamExecution.scala:303)
        at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
        at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
        at org.apache.spark.sql.execution.streaming.StreamExecution.org$apache$spark$sql$execution$streaming$StreamExecution$$runStream(StreamExecution.scala:286)
        at org.apache.spark.sql.execution.streaming.StreamExecution$$anon$1.run(StreamExecution.scala:209)
    obj.test_ingest_incremental_data_batch1()
  File "C:\Users\agundapaneni\Development\ModernDataEstate\tests\test_mdefbasic.py", line 56, in test_ingest_incremental_data_batch1
    mdef.ingest_incremental_data('example', entity, self.schemas['studentattendance'], 'school_year')
  File "C:\Users\agundapaneni\Development\ModernDataEstate/src\MDEFBasic.py", line 109, in ingest_incremental_data
    query.awaitTermination()   # block until query is terminated, with stop() or with error; A StreamingQueryException will be thrown if an exception occurs.
  File "C:\Users\agundapaneni\Development\ModernDataEstate\.tox\default\lib\site-packages\pyspark\sql\streaming.py", line 101, in awaitTermination
    return self._jsq.awaitTermination()
  File "C:\Users\agundapaneni\Development\ModernDataEstate\.tox\default\lib\site-packages\py4j\java_gateway.py", line 1309, in __call__
    return_value = get_return_value(
  File "C:\Users\agundapaneni\Development\ModernDataEstate\.tox\default\lib\site-packages\pyspark\sql\utils.py", line 117, in deco
    raise converted from None
pyspark.sql.utils.StreamingQueryException: org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaConverter$.checkFieldNames(Lscala/collection/Seq;)V
=== Streaming Query ===

Re: Unable to use WriteStream to write to delta file.

Posted by Gourav Sengupta <go...@gmail.com>.

Hi,

also please ensure that you have read all the required documentation to
understand whether you need to do any metadata migration or not.


Regards,
Gourav Sengupta

On Sun, Dec 19, 2021 at 11:55 AM Alex Ott <al...@gmail.com> wrote:

> Make sure that you're using compatible version of Delta Lake library. For
> Spark 3.2 it's 1.1.0:
> https://github.com/delta-io/delta/releases/tag/v1.1.0
> For other Spark versions, check this table:
> https://docs.delta.io/latest/releases.html
>
> On Fri, Dec 17, 2021 at 2:36 PM Stelios Philippou <st...@gmail.com>
> wrote:
>
>> Hi Abhinav,
>>
>> Using ReadStream or Read will not mind.
>>
>> The following error
>> java.lang.NoSuchMethodError:
>> org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaConverter$.checkFieldNames(
>>
>> states that you are using different version of Spark at someplace of your
>> project or you are using an older component
>>
>> Please check your Spark Binaries and as well as your pom that you are
>> indeed using the same versions.
>>
>> On Fri, 17 Dec 2021 at 15:11, Abhinav Gundapaneni
>> <ag...@microsoft.com.invalid> wrote:
>>
>>> Hello Spark community,
>>>
>>>
>>>
>>> I’m using Apache spark(version 3.2) to read a CSV file to a dataframe
>>> using ReadStream, process the dataframe and write the dataframe to Delta
>>> file using WriteStream. I’m getting a failure during the WriteStream
>>> process. I’m trying to run the script locally in my windows 11 machine.
>>> Below is the stack trace of the error I’m facing. Please let me know if
>>> there’s anything that I’m missing.
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>> java.lang.NoSuchMethodError:
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaConverter$.checkFieldNames(Lscala/collection/Seq;)V
>>>
>>>         at
>>> org.apache.spark.sql.delta.schema.SchemaUtils$.checkFieldNames(SchemaUtils.scala:958)
>>>
>>>
>>>         at
>>> org.apache.spark.sql.delta.OptimisticTransactionImpl.verifyNewMetadata(OptimisticTransaction.scala:216)
>>>
>>>         at
>>> org.apache.spark.sql.delta.OptimisticTransactionImpl.verifyNewMetadata$(OptimisticTransaction.scala:214)
>>>
>>>         at
>>> org.apache.spark.sql.delta.OptimisticTransaction.verifyNewMetadata(OptimisticTransaction.scala:80)
>>>
>>>         at
>>> org.apache.spark.sql.delta.OptimisticTransactionImpl.updateMetadata(OptimisticTransaction.scala:208)
>>>
>>>         at
>>> org.apache.spark.sql.delta.OptimisticTransactionImpl.updateMetadata$(OptimisticTransaction.scala:195)
>>>
>>>         at
>>> org.apache.spark.sql.delta.OptimisticTransaction.updateMetadata(OptimisticTransaction.scala:80)
>>>
>>>         at
>>> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata(ImplicitMetadataOperation.scala:101)
>>>
>>>         at
>>> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata$(ImplicitMetadataOperation.scala:62)
>>>
>>>         at
>>> org.apache.spark.sql.delta.sources.DeltaSink.updateMetadata(DeltaSink.scala:37)
>>>
>>>
>>>         at
>>> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata(ImplicitMetadataOperation.scala:59)
>>>
>>>         at
>>> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata$(ImplicitMetadataOperation.scala:50)
>>>
>>>         at
>>> org.apache.spark.sql.delta.sources.DeltaSink.updateMetadata(DeltaSink.scala:37)
>>>
>>>
>>>         at
>>> org.apache.spark.sql.delta.sources.DeltaSink.$anonfun$addBatch$1(DeltaSink.scala:80)
>>>
>>>
>>>         at
>>> org.apache.spark.sql.delta.sources.DeltaSink.$anonfun$addBatch$1$adapted(DeltaSink.scala:54)
>>>
>>>         at
>>> org.apache.spark.sql.delta.DeltaLog.withNewTransaction(DeltaLog.scala:188)
>>>
>>>         at
>>> org.apache.spark.sql.delta.sources.DeltaSink.addBatch(DeltaSink.scala:54)
>>>
>>>         at
>>> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$17(MicroBatchExecution.scala:600)
>>>
>>>         at
>>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
>>>
>>>         at
>>> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
>>>
>>>         at
>>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
>>>
>>>         at
>>> org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>>>
>>>         at
>>> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
>>>
>>>         at
>>> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$16(MicroBatchExecution.scala:598)
>>>
>>>         at
>>> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:375)
>>>
>>>         at
>>> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:373)
>>>
>>>         at
>>> org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)
>>>
>>>         at
>>> org.apache.spark.sql.execution.streaming.MicroBatchExecution.runBatch(MicroBatchExecution.scala:598)
>>>
>>>         at
>>> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$2(MicroBatchExecution.scala:228)
>>>
>>>         at
>>> scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
>>>
>>>         at
>>> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:375)
>>>
>>>         at
>>> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:373)
>>>
>>>         at
>>> org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)
>>>
>>>         at
>>> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$1(MicroBatchExecution.scala:193)
>>>
>>>         at
>>> org.apache.spark.sql.execution.streaming.OneTimeExecutor.execute(TriggerExecutor.scala:39)
>>>
>>>         at
>>> org.apache.spark.sql.execution.streaming.MicroBatchExecution.runActivatedStream(MicroBatchExecution.scala:187)
>>>
>>>         at
>>> org.apache.spark.sql.execution.streaming.StreamExecution.$anonfun$runStream$1(StreamExecution.scala:303)
>>>
>>>         at
>>> scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
>>>
>>>         at
>>> org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>>>
>>>         at org.apache.spark.sql.execution.streaming.StreamExecution.org
>>> $apache$spark$sql$execution$streaming$StreamExecution$$runStream(StreamExecution.scala:286)
>>>
>>>         at
>>> org.apache.spark.sql.execution.streaming.StreamExecution$$anon$1.run(StreamExecution.scala:209)
>>>
>>> Exception in thread "stream execution thread for [id =
>>> aafe131a-0785-4285-8b5e-7735b30959a7, runId =
>>> effac477-1036-498e-961b-41e9b76c68df]" java.lang.NoSuchMethodError:
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaConverter$.checkFieldNames(Lscala/collection/Seq;)V
>>>
>>>
>>>         at
>>> org.apache.spark.sql.delta.schema.SchemaUtils$.checkFieldNames(SchemaUtils.scala:958)
>>>
>>>
>>>         at
>>> org.apache.spark.sql.delta.OptimisticTransactionImpl.verifyNewMetadata(OptimisticTransaction.scala:216)
>>>
>>>         at
>>> org.apache.spark.sql.delta.OptimisticTransactionImpl.verifyNewMetadata$(OptimisticTransaction.scala:214)
>>>
>>>         at
>>> org.apache.spark.sql.delta.OptimisticTransaction.verifyNewMetadata(OptimisticTransaction.scala:80)
>>>
>>>         at
>>> org.apache.spark.sql.delta.OptimisticTransactionImpl.updateMetadata(OptimisticTransaction.scala:208)
>>>
>>>         at
>>> org.apache.spark.sql.delta.OptimisticTransactionImpl.updateMetadata$(OptimisticTransaction.scala:195)
>>>
>>>         at
>>> org.apache.spark.sql.delta.OptimisticTransaction.updateMetadata(OptimisticTransaction.scala:80)
>>>
>>>         at
>>> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata(ImplicitMetadataOperation.scala:101)
>>>
>>>         at
>>> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata$(ImplicitMetadataOperation.scala:62)
>>>
>>>         at
>>> org.apache.spark.sql.delta.sources.DeltaSink.updateMetadata(DeltaSink.scala:37)
>>>
>>>
>>>         at
>>> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata(ImplicitMetadataOperation.scala:59)
>>>
>>>         at
>>> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata$(ImplicitMetadataOperation.scala:50)
>>>
>>>         at
>>> org.apache.spark.sql.delta.sources.DeltaSink.updateMetadata(DeltaSink.scala:37)
>>>
>>>
>>>         at
>>> org.apache.spark.sql.delta.sources.DeltaSink.$anonfun$addBatch$1(DeltaSink.scala:80)
>>>
>>>
>>>         at
>>> org.apache.spark.sql.delta.sources.DeltaSink.$anonfun$addBatch$1$adapted(DeltaSink.scala:54)
>>>
>>>         at
>>> org.apache.spark.sql.delta.DeltaLog.withNewTransaction(DeltaLog.scala:188)
>>>
>>>         at
>>> org.apache.spark.sql.delta.sources.DeltaSink.addBatch(DeltaSink.scala:54)
>>>
>>>         at
>>> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$17(MicroBatchExecution.scala:600)
>>>
>>>         at
>>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
>>>
>>>         at
>>> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
>>>
>>>         at
>>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
>>>
>>>         at
>>> org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>>>
>>>         at
>>> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
>>>
>>>         at
>>> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$16(MicroBatchExecution.scala:598)
>>>
>>>         at
>>> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:375)
>>>
>>>         at
>>> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:373)
>>>
>>>         at
>>> org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)
>>>
>>>         at
>>> org.apache.spark.sql.execution.streaming.MicroBatchExecution.runBatch(MicroBatchExecution.scala:598)
>>>
>>>         at
>>> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$2(MicroBatchExecution.scala:228)
>>>
>>>         at
>>> scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
>>>
>>>         at
>>> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:375)
>>>
>>>         at
>>> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:373)
>>>
>>>         at
>>> org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)Traceback
>>> (most recent call last):
>>>
>>>   File
>>> "C:\Users\agundapaneni\Development\ModernDataEstate\tests\test_mdefbasic.py",
>>> line 60, in <module>
>>>
>>>
>>>
>>>         at
>>> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$1(MicroBatchExecution.scala:193)
>>>
>>>         at
>>> org.apache.spark.sql.execution.streaming.OneTimeExecutor.execute(TriggerExecutor.scala:39)
>>>
>>>         at
>>> org.apache.spark.sql.execution.streaming.MicroBatchExecution.runActivatedStream(MicroBatchExecution.scala:187)
>>>
>>>         at
>>> org.apache.spark.sql.execution.streaming.StreamExecution.$anonfun$runStream$1(StreamExecution.scala:303)
>>>
>>>         at
>>> scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
>>>
>>>         at
>>> org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>>>
>>>         at org.apache.spark.sql.execution.streaming.StreamExecution.org
>>> $apache$spark$sql$execution$streaming$StreamExecution$$runStream(StreamExecution.scala:286)
>>>
>>>         at
>>> org.apache.spark.sql.execution.streaming.StreamExecution$$anon$1.run(StreamExecution.scala:209)
>>>
>>>     obj.test_ingest_incremental_data_batch1()
>>>
>>>   File
>>> "C:\Users\agundapaneni\Development\ModernDataEstate\tests\test_mdefbasic.py",
>>> line 56, in test_ingest_incremental_data_batch1
>>>
>>>     mdef.ingest_incremental_data('example', entity,
>>> self.schemas['studentattendance'], 'school_year')
>>>
>>>   File
>>> "C:\Users\agundapaneni\Development\ModernDataEstate/src\MDEFBasic.py", line
>>> 109, in ingest_incremental_data
>>>
>>>     query.awaitTermination()   # block until query is terminated, with
>>> stop() or with error; A StreamingQueryException will be thrown if an
>>> exception occurs.
>>>
>>>   File
>>> "C:\Users\agundapaneni\Development\ModernDataEstate\.tox\default\lib\site-packages\pyspark\sql\streaming.py",
>>> line 101, in awaitTermination
>>>
>>>     return self._jsq.awaitTermination()
>>>
>>>   File
>>> "C:\Users\agundapaneni\Development\ModernDataEstate\.tox\default\lib\site-packages\py4j\java_gateway.py",
>>> line 1309, in __call__
>>>
>>>     return_value = get_return_value(
>>>
>>>   File
>>> "C:\Users\agundapaneni\Development\ModernDataEstate\.tox\default\lib\site-packages\pyspark\sql\utils.py",
>>> line 117, in deco
>>>
>>>     raise converted from None
>>>
>>> pyspark.sql.utils.StreamingQueryException:
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaConverter$.checkFieldNames(Lscala/collection/Seq;)V
>>>
>>> === Streaming Query ===
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>
>
> --
> With best wishes,                    Alex Ott
> http://alexott.net/
> Twitter: alexott_en (English), alexott (Russian)
>

Re: Unable to use WriteStream to write to delta file.

Posted by Alex Ott <al...@gmail.com>.

Make sure that you're using compatible version of Delta Lake library. For
Spark 3.2 it's 1.1.0: https://github.com/delta-io/delta/releases/tag/v1.1.0
For other Spark versions, check this table:
https://docs.delta.io/latest/releases.html

On Fri, Dec 17, 2021 at 2:36 PM Stelios Philippou <st...@gmail.com>
wrote:

> Hi Abhinav,
>
> Using ReadStream or Read will not mind.
>
> The following error
> java.lang.NoSuchMethodError:
> org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaConverter$.checkFieldNames(
>
> states that you are using different version of Spark at someplace of your
> project or you are using an older component
>
> Please check your Spark Binaries and as well as your pom that you are
> indeed using the same versions.
>
> On Fri, 17 Dec 2021 at 15:11, Abhinav Gundapaneni
> <ag...@microsoft.com.invalid> wrote:
>
>> Hello Spark community,
>>
>>
>>
>> I’m using Apache spark(version 3.2) to read a CSV file to a dataframe
>> using ReadStream, process the dataframe and write the dataframe to Delta
>> file using WriteStream. I’m getting a failure during the WriteStream
>> process. I’m trying to run the script locally in my windows 11 machine.
>> Below is the stack trace of the error I’m facing. Please let me know if
>> there’s anything that I’m missing.
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>> java.lang.NoSuchMethodError:
>> org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaConverter$.checkFieldNames(Lscala/collection/Seq;)V
>>
>>         at
>> org.apache.spark.sql.delta.schema.SchemaUtils$.checkFieldNames(SchemaUtils.scala:958)
>>
>>
>>         at
>> org.apache.spark.sql.delta.OptimisticTransactionImpl.verifyNewMetadata(OptimisticTransaction.scala:216)
>>
>>         at
>> org.apache.spark.sql.delta.OptimisticTransactionImpl.verifyNewMetadata$(OptimisticTransaction.scala:214)
>>
>>         at
>> org.apache.spark.sql.delta.OptimisticTransaction.verifyNewMetadata(OptimisticTransaction.scala:80)
>>
>>         at
>> org.apache.spark.sql.delta.OptimisticTransactionImpl.updateMetadata(OptimisticTransaction.scala:208)
>>
>>         at
>> org.apache.spark.sql.delta.OptimisticTransactionImpl.updateMetadata$(OptimisticTransaction.scala:195)
>>
>>         at
>> org.apache.spark.sql.delta.OptimisticTransaction.updateMetadata(OptimisticTransaction.scala:80)
>>
>>         at
>> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata(ImplicitMetadataOperation.scala:101)
>>
>>         at
>> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata$(ImplicitMetadataOperation.scala:62)
>>
>>         at
>> org.apache.spark.sql.delta.sources.DeltaSink.updateMetadata(DeltaSink.scala:37)
>>
>>
>>         at
>> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata(ImplicitMetadataOperation.scala:59)
>>
>>         at
>> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata$(ImplicitMetadataOperation.scala:50)
>>
>>         at
>> org.apache.spark.sql.delta.sources.DeltaSink.updateMetadata(DeltaSink.scala:37)
>>
>>
>>         at
>> org.apache.spark.sql.delta.sources.DeltaSink.$anonfun$addBatch$1(DeltaSink.scala:80)
>>
>>
>>         at
>> org.apache.spark.sql.delta.sources.DeltaSink.$anonfun$addBatch$1$adapted(DeltaSink.scala:54)
>>
>>         at
>> org.apache.spark.sql.delta.DeltaLog.withNewTransaction(DeltaLog.scala:188)
>>
>>         at
>> org.apache.spark.sql.delta.sources.DeltaSink.addBatch(DeltaSink.scala:54)
>>
>>         at
>> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$17(MicroBatchExecution.scala:600)
>>
>>         at
>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
>>
>>         at
>> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
>>
>>         at
>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
>>
>>         at
>> org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>>
>>         at
>> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
>>
>>         at
>> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$16(MicroBatchExecution.scala:598)
>>
>>         at
>> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:375)
>>
>>         at
>> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:373)
>>
>>         at
>> org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)
>>
>>         at
>> org.apache.spark.sql.execution.streaming.MicroBatchExecution.runBatch(MicroBatchExecution.scala:598)
>>
>>         at
>> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$2(MicroBatchExecution.scala:228)
>>
>>         at
>> scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
>>
>>         at
>> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:375)
>>
>>         at
>> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:373)
>>
>>         at
>> org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)
>>
>>         at
>> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$1(MicroBatchExecution.scala:193)
>>
>>         at
>> org.apache.spark.sql.execution.streaming.OneTimeExecutor.execute(TriggerExecutor.scala:39)
>>
>>         at
>> org.apache.spark.sql.execution.streaming.MicroBatchExecution.runActivatedStream(MicroBatchExecution.scala:187)
>>
>>         at
>> org.apache.spark.sql.execution.streaming.StreamExecution.$anonfun$runStream$1(StreamExecution.scala:303)
>>
>>         at
>> scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
>>
>>         at
>> org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>>
>>         at org.apache.spark.sql.execution.streaming.StreamExecution.org
>> $apache$spark$sql$execution$streaming$StreamExecution$$runStream(StreamExecution.scala:286)
>>
>>         at
>> org.apache.spark.sql.execution.streaming.StreamExecution$$anon$1.run(StreamExecution.scala:209)
>>
>> Exception in thread "stream execution thread for [id =
>> aafe131a-0785-4285-8b5e-7735b30959a7, runId =
>> effac477-1036-498e-961b-41e9b76c68df]" java.lang.NoSuchMethodError:
>> org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaConverter$.checkFieldNames(Lscala/collection/Seq;)V
>>
>>
>>         at
>> org.apache.spark.sql.delta.schema.SchemaUtils$.checkFieldNames(SchemaUtils.scala:958)
>>
>>
>>         at
>> org.apache.spark.sql.delta.OptimisticTransactionImpl.verifyNewMetadata(OptimisticTransaction.scala:216)
>>
>>         at
>> org.apache.spark.sql.delta.OptimisticTransactionImpl.verifyNewMetadata$(OptimisticTransaction.scala:214)
>>
>>         at
>> org.apache.spark.sql.delta.OptimisticTransaction.verifyNewMetadata(OptimisticTransaction.scala:80)
>>
>>         at
>> org.apache.spark.sql.delta.OptimisticTransactionImpl.updateMetadata(OptimisticTransaction.scala:208)
>>
>>         at
>> org.apache.spark.sql.delta.OptimisticTransactionImpl.updateMetadata$(OptimisticTransaction.scala:195)
>>
>>         at
>> org.apache.spark.sql.delta.OptimisticTransaction.updateMetadata(OptimisticTransaction.scala:80)
>>
>>         at
>> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata(ImplicitMetadataOperation.scala:101)
>>
>>         at
>> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata$(ImplicitMetadataOperation.scala:62)
>>
>>         at
>> org.apache.spark.sql.delta.sources.DeltaSink.updateMetadata(DeltaSink.scala:37)
>>
>>
>>         at
>> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata(ImplicitMetadataOperation.scala:59)
>>
>>         at
>> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata$(ImplicitMetadataOperation.scala:50)
>>
>>         at
>> org.apache.spark.sql.delta.sources.DeltaSink.updateMetadata(DeltaSink.scala:37)
>>
>>
>>         at
>> org.apache.spark.sql.delta.sources.DeltaSink.$anonfun$addBatch$1(DeltaSink.scala:80)
>>
>>
>>         at
>> org.apache.spark.sql.delta.sources.DeltaSink.$anonfun$addBatch$1$adapted(DeltaSink.scala:54)
>>
>>         at
>> org.apache.spark.sql.delta.DeltaLog.withNewTransaction(DeltaLog.scala:188)
>>
>>         at
>> org.apache.spark.sql.delta.sources.DeltaSink.addBatch(DeltaSink.scala:54)
>>
>>         at
>> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$17(MicroBatchExecution.scala:600)
>>
>>         at
>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
>>
>>         at
>> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
>>
>>         at
>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
>>
>>         at
>> org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>>
>>         at
>> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
>>
>>         at
>> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$16(MicroBatchExecution.scala:598)
>>
>>         at
>> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:375)
>>
>>         at
>> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:373)
>>
>>         at
>> org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)
>>
>>         at
>> org.apache.spark.sql.execution.streaming.MicroBatchExecution.runBatch(MicroBatchExecution.scala:598)
>>
>>         at
>> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$2(MicroBatchExecution.scala:228)
>>
>>         at
>> scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
>>
>>         at
>> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:375)
>>
>>         at
>> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:373)
>>
>>         at
>> org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)Traceback
>> (most recent call last):
>>
>>   File
>> "C:\Users\agundapaneni\Development\ModernDataEstate\tests\test_mdefbasic.py",
>> line 60, in <module>
>>
>>
>>
>>         at
>> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$1(MicroBatchExecution.scala:193)
>>
>>         at
>> org.apache.spark.sql.execution.streaming.OneTimeExecutor.execute(TriggerExecutor.scala:39)
>>
>>         at
>> org.apache.spark.sql.execution.streaming.MicroBatchExecution.runActivatedStream(MicroBatchExecution.scala:187)
>>
>>         at
>> org.apache.spark.sql.execution.streaming.StreamExecution.$anonfun$runStream$1(StreamExecution.scala:303)
>>
>>         at
>> scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
>>
>>         at
>> org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>>
>>         at org.apache.spark.sql.execution.streaming.StreamExecution.org
>> $apache$spark$sql$execution$streaming$StreamExecution$$runStream(StreamExecution.scala:286)
>>
>>         at
>> org.apache.spark.sql.execution.streaming.StreamExecution$$anon$1.run(StreamExecution.scala:209)
>>
>>     obj.test_ingest_incremental_data_batch1()
>>
>>   File
>> "C:\Users\agundapaneni\Development\ModernDataEstate\tests\test_mdefbasic.py",
>> line 56, in test_ingest_incremental_data_batch1
>>
>>     mdef.ingest_incremental_data('example', entity,
>> self.schemas['studentattendance'], 'school_year')
>>
>>   File
>> "C:\Users\agundapaneni\Development\ModernDataEstate/src\MDEFBasic.py", line
>> 109, in ingest_incremental_data
>>
>>     query.awaitTermination()   # block until query is terminated, with
>> stop() or with error; A StreamingQueryException will be thrown if an
>> exception occurs.
>>
>>   File
>> "C:\Users\agundapaneni\Development\ModernDataEstate\.tox\default\lib\site-packages\pyspark\sql\streaming.py",
>> line 101, in awaitTermination
>>
>>     return self._jsq.awaitTermination()
>>
>>   File
>> "C:\Users\agundapaneni\Development\ModernDataEstate\.tox\default\lib\site-packages\py4j\java_gateway.py",
>> line 1309, in __call__
>>
>>     return_value = get_return_value(
>>
>>   File
>> "C:\Users\agundapaneni\Development\ModernDataEstate\.tox\default\lib\site-packages\pyspark\sql\utils.py",
>> line 117, in deco
>>
>>     raise converted from None
>>
>> pyspark.sql.utils.StreamingQueryException:
>> org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaConverter$.checkFieldNames(Lscala/collection/Seq;)V
>>
>> === Streaming Query ===
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>

-- 
With best wishes,                    Alex Ott
http://alexott.net/
Twitter: alexott_en (English), alexott (Russian)

Re: Unable to use WriteStream to write to delta file.

Posted by Stelios Philippou <st...@gmail.com>.

Hi Abhinav,

Using ReadStream or Read will not mind.

The following error
java.lang.NoSuchMethodError:
org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaConverter$.checkFieldNames(

states that you are using different version of Spark at someplace of your
project or you are using an older component

Please check your Spark Binaries and as well as your pom that you are
indeed using the same versions.

On Fri, 17 Dec 2021 at 15:11, Abhinav Gundapaneni
<ag...@microsoft.com.invalid> wrote:

> Hello Spark community,
>
>
>
> I’m using Apache spark(version 3.2) to read a CSV file to a dataframe
> using ReadStream, process the dataframe and write the dataframe to Delta
> file using WriteStream. I’m getting a failure during the WriteStream
> process. I’m trying to run the script locally in my windows 11 machine.
> Below is the stack trace of the error I’m facing. Please let me know if
> there’s anything that I’m missing.
>
>
>
>
>
>
>
>
>
>
>
> java.lang.NoSuchMethodError:
> org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaConverter$.checkFieldNames(Lscala/collection/Seq;)V
>
>         at
> org.apache.spark.sql.delta.schema.SchemaUtils$.checkFieldNames(SchemaUtils.scala:958)
>
>
>         at
> org.apache.spark.sql.delta.OptimisticTransactionImpl.verifyNewMetadata(OptimisticTransaction.scala:216)
>
>         at
> org.apache.spark.sql.delta.OptimisticTransactionImpl.verifyNewMetadata$(OptimisticTransaction.scala:214)
>
>         at
> org.apache.spark.sql.delta.OptimisticTransaction.verifyNewMetadata(OptimisticTransaction.scala:80)
>
>         at
> org.apache.spark.sql.delta.OptimisticTransactionImpl.updateMetadata(OptimisticTransaction.scala:208)
>
>         at
> org.apache.spark.sql.delta.OptimisticTransactionImpl.updateMetadata$(OptimisticTransaction.scala:195)
>
>         at
> org.apache.spark.sql.delta.OptimisticTransaction.updateMetadata(OptimisticTransaction.scala:80)
>
>         at
> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata(ImplicitMetadataOperation.scala:101)
>
>         at
> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata$(ImplicitMetadataOperation.scala:62)
>
>         at
> org.apache.spark.sql.delta.sources.DeltaSink.updateMetadata(DeltaSink.scala:37)
>
>
>         at
> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata(ImplicitMetadataOperation.scala:59)
>
>         at
> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata$(ImplicitMetadataOperation.scala:50)
>
>         at
> org.apache.spark.sql.delta.sources.DeltaSink.updateMetadata(DeltaSink.scala:37)
>
>
>         at
> org.apache.spark.sql.delta.sources.DeltaSink.$anonfun$addBatch$1(DeltaSink.scala:80)
>
>
>         at
> org.apache.spark.sql.delta.sources.DeltaSink.$anonfun$addBatch$1$adapted(DeltaSink.scala:54)
>
>         at
> org.apache.spark.sql.delta.DeltaLog.withNewTransaction(DeltaLog.scala:188)
>
>         at
> org.apache.spark.sql.delta.sources.DeltaSink.addBatch(DeltaSink.scala:54)
>
>         at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$17(MicroBatchExecution.scala:600)
>
>         at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
>
>         at
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
>
>         at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
>
>         at
> org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>
>         at
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
>
>         at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$16(MicroBatchExecution.scala:598)
>
>         at
> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:375)
>
>         at
> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:373)
>
>         at
> org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)
>
>         at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.runBatch(MicroBatchExecution.scala:598)
>
>         at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$2(MicroBatchExecution.scala:228)
>
>         at
> scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
>
>         at
> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:375)
>
>         at
> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:373)
>
>         at
> org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)
>
>         at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$1(MicroBatchExecution.scala:193)
>
>         at
> org.apache.spark.sql.execution.streaming.OneTimeExecutor.execute(TriggerExecutor.scala:39)
>
>         at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.runActivatedStream(MicroBatchExecution.scala:187)
>
>         at
> org.apache.spark.sql.execution.streaming.StreamExecution.$anonfun$runStream$1(StreamExecution.scala:303)
>
>         at
> scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
>
>         at
> org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>
>         at org.apache.spark.sql.execution.streaming.StreamExecution.org
> $apache$spark$sql$execution$streaming$StreamExecution$$runStream(StreamExecution.scala:286)
>
>         at
> org.apache.spark.sql.execution.streaming.StreamExecution$$anon$1.run(StreamExecution.scala:209)
>
> Exception in thread "stream execution thread for [id =
> aafe131a-0785-4285-8b5e-7735b30959a7, runId =
> effac477-1036-498e-961b-41e9b76c68df]" java.lang.NoSuchMethodError:
> org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaConverter$.checkFieldNames(Lscala/collection/Seq;)V
>
>
>         at
> org.apache.spark.sql.delta.schema.SchemaUtils$.checkFieldNames(SchemaUtils.scala:958)
>
>
>         at
> org.apache.spark.sql.delta.OptimisticTransactionImpl.verifyNewMetadata(OptimisticTransaction.scala:216)
>
>         at
> org.apache.spark.sql.delta.OptimisticTransactionImpl.verifyNewMetadata$(OptimisticTransaction.scala:214)
>
>         at
> org.apache.spark.sql.delta.OptimisticTransaction.verifyNewMetadata(OptimisticTransaction.scala:80)
>
>         at
> org.apache.spark.sql.delta.OptimisticTransactionImpl.updateMetadata(OptimisticTransaction.scala:208)
>
>         at
> org.apache.spark.sql.delta.OptimisticTransactionImpl.updateMetadata$(OptimisticTransaction.scala:195)
>
>         at
> org.apache.spark.sql.delta.OptimisticTransaction.updateMetadata(OptimisticTransaction.scala:80)
>
>         at
> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata(ImplicitMetadataOperation.scala:101)
>
>         at
> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata$(ImplicitMetadataOperation.scala:62)
>
>         at
> org.apache.spark.sql.delta.sources.DeltaSink.updateMetadata(DeltaSink.scala:37)
>
>
>         at
> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata(ImplicitMetadataOperation.scala:59)
>
>         at
> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata$(ImplicitMetadataOperation.scala:50)
>
>         at
> org.apache.spark.sql.delta.sources.DeltaSink.updateMetadata(DeltaSink.scala:37)
>
>
>         at
> org.apache.spark.sql.delta.sources.DeltaSink.$anonfun$addBatch$1(DeltaSink.scala:80)
>
>
>         at
> org.apache.spark.sql.delta.sources.DeltaSink.$anonfun$addBatch$1$adapted(DeltaSink.scala:54)
>
>         at
> org.apache.spark.sql.delta.DeltaLog.withNewTransaction(DeltaLog.scala:188)
>
>         at
> org.apache.spark.sql.delta.sources.DeltaSink.addBatch(DeltaSink.scala:54)
>
>         at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$17(MicroBatchExecution.scala:600)
>
>         at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
>
>         at
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
>
>         at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
>
>         at
> org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>
>         at
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
>
>         at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$16(MicroBatchExecution.scala:598)
>
>         at
> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:375)
>
>         at
> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:373)
>
>         at
> org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)
>
>         at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.runBatch(MicroBatchExecution.scala:598)
>
>         at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$2(MicroBatchExecution.scala:228)
>
>         at
> scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
>
>         at
> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:375)
>
>         at
> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:373)
>
>         at
> org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)Traceback
> (most recent call last):
>
>   File
> "C:\Users\agundapaneni\Development\ModernDataEstate\tests\test_mdefbasic.py",
> line 60, in <module>
>
>
>
>         at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$1(MicroBatchExecution.scala:193)
>
>         at
> org.apache.spark.sql.execution.streaming.OneTimeExecutor.execute(TriggerExecutor.scala:39)
>
>         at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.runActivatedStream(MicroBatchExecution.scala:187)
>
>         at
> org.apache.spark.sql.execution.streaming.StreamExecution.$anonfun$runStream$1(StreamExecution.scala:303)
>
>         at
> scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
>
>         at
> org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>
>         at org.apache.spark.sql.execution.streaming.StreamExecution.org
> $apache$spark$sql$execution$streaming$StreamExecution$$runStream(StreamExecution.scala:286)
>
>         at
> org.apache.spark.sql.execution.streaming.StreamExecution$$anon$1.run(StreamExecution.scala:209)
>
>     obj.test_ingest_incremental_data_batch1()
>
>   File
> "C:\Users\agundapaneni\Development\ModernDataEstate\tests\test_mdefbasic.py",
> line 56, in test_ingest_incremental_data_batch1
>
>     mdef.ingest_incremental_data('example', entity,
> self.schemas['studentattendance'], 'school_year')
>
>   File
> "C:\Users\agundapaneni\Development\ModernDataEstate/src\MDEFBasic.py", line
> 109, in ingest_incremental_data
>
>     query.awaitTermination()   # block until query is terminated, with
> stop() or with error; A StreamingQueryException will be thrown if an
> exception occurs.
>
>   File
> "C:\Users\agundapaneni\Development\ModernDataEstate\.tox\default\lib\site-packages\pyspark\sql\streaming.py",
> line 101, in awaitTermination
>
>     return self._jsq.awaitTermination()
>
>   File
> "C:\Users\agundapaneni\Development\ModernDataEstate\.tox\default\lib\site-packages\py4j\java_gateway.py",
> line 1309, in __call__
>
>     return_value = get_return_value(
>
>   File
> "C:\Users\agundapaneni\Development\ModernDataEstate\.tox\default\lib\site-packages\pyspark\sql\utils.py",
> line 117, in deco
>
>     raise converted from None
>
> pyspark.sql.utils.StreamingQueryException:
> org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaConverter$.checkFieldNames(Lscala/collection/Seq;)V
>
> === Streaming Query ===
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

Re: Unable to use WriteStream to write to delta file.

Posted by bi...@bitfox.top.

May I ask why you don’t  use spark.read and spark.write instead of 
readStream and writeStream? Thanks.

On 2021-12-17 15:09, Abhinav Gundapaneni wrote:
> Hello Spark community,
> 
> I’m using Apache spark(version 3.2) to read a CSV file to a
> dataframe using ReadStream, process the dataframe and write the
> dataframe to Delta file using WriteStream. I’m getting a failure
> during the WriteStream process. I’m trying to run the script locally
> in my windows 11 machine. Below is the stack trace of the error I’m
> facing. Please let me know if there’s anything that I’m missing.
> 
> java.lang.NoSuchMethodError:
> org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaConverter$.checkFieldNames(Lscala/collection/Seq;)V
> 
> 
>         at
> org.apache.spark.sql.delta.schema.SchemaUtils$.checkFieldNames(SchemaUtils.scala:958)
> 
> 
>         at
> org.apache.spark.sql.delta.OptimisticTransactionImpl.verifyNewMetadata(OptimisticTransaction.scala:216)
> 
> 
>         at
> org.apache.spark.sql.delta.OptimisticTransactionImpl.verifyNewMetadata$(OptimisticTransaction.scala:214)
> 
> 
>         at
> org.apache.spark.sql.delta.OptimisticTransaction.verifyNewMetadata(OptimisticTransaction.scala:80)
> 
> 
>         at
> org.apache.spark.sql.delta.OptimisticTransactionImpl.updateMetadata(OptimisticTransaction.scala:208)
> 
> 
>         at
> org.apache.spark.sql.delta.OptimisticTransactionImpl.updateMetadata$(OptimisticTransaction.scala:195)
> 
> 
>         at
> org.apache.spark.sql.delta.OptimisticTransaction.updateMetadata(OptimisticTransaction.scala:80)
> 
> 
>         at
> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata(ImplicitMetadataOperation.scala:101)
> 
> 
>         at
> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata$(ImplicitMetadataOperation.scala:62)
> 
> 
>         at
> org.apache.spark.sql.delta.sources.DeltaSink.updateMetadata(DeltaSink.scala:37)
> 
> 
>         at
> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata(ImplicitMetadataOperation.scala:59)
> 
> 
>         at
> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata$(ImplicitMetadataOperation.scala:50)
> 
> 
>         at
> org.apache.spark.sql.delta.sources.DeltaSink.updateMetadata(DeltaSink.scala:37)
> 
> 
>         at
> org.apache.spark.sql.delta.sources.DeltaSink.$anonfun$addBatch$1(DeltaSink.scala:80)
> 
> 
>         at
> org.apache.spark.sql.delta.sources.DeltaSink.$anonfun$addBatch$1$adapted(DeltaSink.scala:54)
> 
> 
>         at
> org.apache.spark.sql.delta.DeltaLog.withNewTransaction(DeltaLog.scala:188)
> 
> 
>         at
> org.apache.spark.sql.delta.sources.DeltaSink.addBatch(DeltaSink.scala:54)
> 
> 
>         at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$17(MicroBatchExecution.scala:600)
> 
> 
>         at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
> 
> 
>         at
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
> 
> 
>         at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
> 
> 
>         at
> org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
> 
>         at
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
> 
> 
>         at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$16(MicroBatchExecution.scala:598)
> 
> 
>         at
> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:375)
> 
> 
>         at
> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:373)
> 
> 
>         at
> org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)
> 
> 
>         at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.runBatch(MicroBatchExecution.scala:598)
> 
> 
>         at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$2(MicroBatchExecution.scala:228)
> 
> 
>         at
> scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
> 
> 
>         at
> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:375)
> 
> 
>         at
> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:373)
> 
> 
>         at
> org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)
> 
> 
>         at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$1(MicroBatchExecution.scala:193)
> 
> 
>         at
> org.apache.spark.sql.execution.streaming.OneTimeExecutor.execute(TriggerExecutor.scala:39)
> 
> 
>         at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.runActivatedStream(MicroBatchExecution.scala:187)
> 
> 
>         at
> org.apache.spark.sql.execution.streaming.StreamExecution.$anonfun$runStream$1(StreamExecution.scala:303)
> 
> 
>         at
> scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
> 
> 
>         at
> org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
> 
>         at
> org.apache.spark.sql.execution.streaming.StreamExecution.org$apache$spark$sql$execution$streaming$StreamExecution$$runStream(StreamExecution.scala:286)
> 
> 
>         at
> org.apache.spark.sql.execution.streaming.StreamExecution$$anon$1.run(StreamExecution.scala:209)
> 
> 
> Exception in thread "stream execution thread for [id =
> aafe131a-0785-4285-8b5e-7735b30959a7, runId =
> effac477-1036-498e-961b-41e9b76c68df]" java.lang.NoSuchMethodError:
> org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaConverter$.checkFieldNames(Lscala/collection/Seq;)V
> 
> 
>         at
> org.apache.spark.sql.delta.schema.SchemaUtils$.checkFieldNames(SchemaUtils.scala:958)
> 
> 
>         at
> org.apache.spark.sql.delta.OptimisticTransactionImpl.verifyNewMetadata(OptimisticTransaction.scala:216)
> 
> 
>         at
> org.apache.spark.sql.delta.OptimisticTransactionImpl.verifyNewMetadata$(OptimisticTransaction.scala:214)
> 
> 
>         at
> org.apache.spark.sql.delta.OptimisticTransaction.verifyNewMetadata(OptimisticTransaction.scala:80)
> 
> 
>         at
> org.apache.spark.sql.delta.OptimisticTransactionImpl.updateMetadata(OptimisticTransaction.scala:208)
> 
> 
>         at
> org.apache.spark.sql.delta.OptimisticTransactionImpl.updateMetadata$(OptimisticTransaction.scala:195)
> 
> 
>         at
> org.apache.spark.sql.delta.OptimisticTransaction.updateMetadata(OptimisticTransaction.scala:80)
> 
> 
>         at
> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata(ImplicitMetadataOperation.scala:101)
> 
> 
>         at
> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata$(ImplicitMetadataOperation.scala:62)
> 
> 
>         at
> org.apache.spark.sql.delta.sources.DeltaSink.updateMetadata(DeltaSink.scala:37)
> 
> 
>         at
> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata(ImplicitMetadataOperation.scala:59)
> 
> 
>         at
> org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata$(ImplicitMetadataOperation.scala:50)
> 
> 
>         at
> org.apache.spark.sql.delta.sources.DeltaSink.updateMetadata(DeltaSink.scala:37)
> 
> 
>         at
> org.apache.spark.sql.delta.sources.DeltaSink.$anonfun$addBatch$1(DeltaSink.scala:80)
> 
> 
>         at
> org.apache.spark.sql.delta.sources.DeltaSink.$anonfun$addBatch$1$adapted(DeltaSink.scala:54)
> 
> 
>         at
> org.apache.spark.sql.delta.DeltaLog.withNewTransaction(DeltaLog.scala:188)
> 
> 
>         at
> org.apache.spark.sql.delta.sources.DeltaSink.addBatch(DeltaSink.scala:54)
> 
> 
>         at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$17(MicroBatchExecution.scala:600)
> 
> 
>         at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
> 
> 
>         at
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
> 
> 
>         at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
> 
> 
>         at
> org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
> 
>         at
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
> 
> 
>         at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$16(MicroBatchExecution.scala:598)
> 
> 
>         at
> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:375)
> 
> 
>         at
> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:373)
> 
> 
>         at
> org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)
> 
> 
>         at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.runBatch(MicroBatchExecution.scala:598)
> 
> 
>         at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$2(MicroBatchExecution.scala:228)
> 
> 
>         at
> scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
> 
> 
>         at
> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:375)
> 
> 
>         at
> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:373)
> 
> 
>         at
> org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)Traceback
> (most recent call last):
> 
>   File
> "C:\Users\agundapaneni\Development\ModernDataEstate\tests\test_mdefbasic.py",
> line 60, in <module>
> 
>         at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$1(MicroBatchExecution.scala:193)
> 
> 
>         at
> org.apache.spark.sql.execution.streaming.OneTimeExecutor.execute(TriggerExecutor.scala:39)
> 
> 
>         at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.runActivatedStream(MicroBatchExecution.scala:187)
> 
> 
>         at
> org.apache.spark.sql.execution.streaming.StreamExecution.$anonfun$runStream$1(StreamExecution.scala:303)
> 
> 
>         at
> scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
> 
> 
>         at
> org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
> 
>         at
> org.apache.spark.sql.execution.streaming.StreamExecution.org$apache$spark$sql$execution$streaming$StreamExecution$$runStream(StreamExecution.scala:286)
> 
> 
>         at
> org.apache.spark.sql.execution.streaming.StreamExecution$$anon$1.run(StreamExecution.scala:209)
> 
> 
>     obj.test_ingest_incremental_data_batch1()
> 
>   File
> "C:\Users\agundapaneni\Development\ModernDataEstate\tests\test_mdefbasic.py",
> line 56, in test_ingest_incremental_data_batch1
> 
>     mdef.ingest_incremental_data('example', entity,
> self.schemas['studentattendance'], 'school_year')
> 
>   File
> "C:\Users\agundapaneni\Development\ModernDataEstate/src\MDEFBasic.py",
> line 109, in ingest_incremental_data
> 
>     query.awaitTermination()   # block until query is terminated, with
> stop() or with error; A StreamingQueryException will be thrown if an
> exception occurs.
> 
>   File
> "C:\Users\agundapaneni\Development\ModernDataEstate\.tox\default\lib\site-packages\pyspark\sql\streaming.py",
> line 101, in awaitTermination
> 
>     return self._jsq.awaitTermination()
> 
>   File
> "C:\Users\agundapaneni\Development\ModernDataEstate\.tox\default\lib\site-packages\py4j\java_gateway.py",
> line 1309, in __call__
> 
>     return_value = get_return_value(
> 
>   File
> "C:\Users\agundapaneni\Development\ModernDataEstate\.tox\default\lib\site-packages\pyspark\sql\utils.py",
> line 117, in deco
> 
>     raise converted from None
> 
> pyspark.sql.utils.StreamingQueryException:
> org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaConverter$.checkFieldNames(Lscala/collection/Seq;)V
> 
> 
> === Streaming Query ===

---------------------------------------------------------------------
To unsubscribe e-mail: user-unsubscribe@spark.apache.org