You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@spark.apache.org by Bj��rn J��rgensen <bj...@gmail.com> on 2021/08/30 13:07:49 UTC

Can���t write to PVC in K8S

Hi, I have built and running spark on k8s. A link to my repo https://github.com/bjornjorgensen/jlpyk8s

Everything seems to be running fine, but I can’t save to PVC. 
If I convert the dataframe to pandas, then I can save it. 



from pyspark.sql import SparkSession
spark = SparkSession.builder \
    .master("k8s://https://kubernetes.default.svc.cluster.local:443") \
    .config("spark.kubernetes.container.image", "bjornjorgensen/spark-py:v3.2-290821") \
    .config("spark.kubernetes.authenticate.caCertFile", "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt") \
    .config("spark.kubernetes.authenticate.oauthTokenFile", "/var/run/secrets/kubernetes.io/serviceaccount/token") \
    .config("spark.kubernetes.authenticate.driver.serviceAccountName", "my-pyspark-notebook") \
    .config("spark.executor.instances", "10") \
    .config("spark.driver.host", "my-pyspark-notebook-spark-driver.default.svc.cluster.local") \
    .config("spark.driver.port", "29413") \
    .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.options.claimName", "nfs100") \
    .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.path", "/opt/spark/work-dir") \
    .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.options.claimName", "nfs100") \
    .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.path", "/opt/spark/work-dir") \
    .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.readOnly", "False") \
    .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.readOnly", "False") \
    .appName("myApp") \
    .config("spark.sql.repl.eagerEval.enabled", "True") \
    .config("spark.driver.memory", "4g") \
    .config("spark.executor.memory", "4g") \
    .getOrCreate()
sc = spark.sparkContext

pdf.to_parquet("/opt/spark/work-dir/falk/test/F01test.parquet")


21/08/30 12:20:34 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
21/08/30 12:20:34 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
21/08/30 12:20:37 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
21/08/30 12:20:39 WARN TaskSetManager: Lost task 0.0 in stage 25.0 (TID 9497) (10.42.0.16 executor 3): java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220375889526593865835092_0025_m_000000_9497 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)

21/08/30 12:20:40 WARN TaskSetManager: Lost task 0.1 in stage 25.0 (TID 9498) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220371965695886629589207_0025_m_000000_9498 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)

21/08/30 12:20:42 WARN TaskSetManager: Lost task 0.2 in stage 25.0 (TID 9499) (10.42.240.4 executor 4): java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220378533320694235394580_0025_m_000000_9499 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)

21/08/30 12:20:43 WARN TaskSetManager: Lost task 0.3 in stage 25.0 (TID 9500) (10.42.32.15 executor 10): java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)

21/08/30 12:20:43 ERROR TaskSetManager: Task 0 in stage 25.0 failed 4 times; aborting job
21/08/30 12:20:43 ERROR FileFormatWriter: Aborting job d98cdc60-bb44-4189-b483-8449fc793658.
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 25.0 failed 4 times, most recent failure: Lost task 0.3 in stage 25.0 (TID 9500) (10.42.32.15 executor 10): java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)

Driver stacktrace:
	at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
	at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
	at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
	at scala.Option.foreach(Option.scala:407)
	at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
	at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
	at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
	at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
	at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
	at org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
	at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
	at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
	at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
	at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
	at org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
	at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
	at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
	at org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
	at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.base/java.lang.reflect.Method.invoke(Method.java:566)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
	at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
	at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)
---------------------------------------------------------------------------
Py4JJavaError                             Traceback (most recent call last)
/tmp/ipykernel_80/163396320.py in <module>
----> 1 pdf.to_parquet("/opt/spark/work-dir/falk/test/F01test.parquet")

/opt/spark/python/pyspark/pandas/frame.py in to_parquet(self, path, mode, partition_cols, compression, index_col, **options)
   4721         if compression is not None:
   4722             builder.option("compression", compression)
-> 4723         builder.options(**options).format("parquet").save(path)
   4724 
   4725     def to_orc(

/opt/spark/python/pyspark/sql/readwriter.py in save(self, path, format, mode, partitionBy, **options)
    738             self._jwrite.save()
    739         else:
--> 740             self._jwrite.save(path)
    741 
    742     @since(1.4)

/opt/conda/lib/python3.9/site-packages/py4j/java_gateway.py in __call__(self, *args)
   1307 
   1308         answer = self.gateway_client.send_command(command)
-> 1309         return_value = get_return_value(
   1310             answer, self.gateway_client, self.target_id, self.name)
   1311 

/opt/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
    109     def deco(*a, **kw):
    110         try:
--> 111             return f(*a, **kw)
    112         except py4j.protocol.Py4JJavaError as e:
    113             converted = convert_exception(e.java_exception)

/opt/conda/lib/python3.9/site-packages/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
    324             value = OUTPUT_CONVERTER[type](answer[2:], gateway_client)
    325             if answer[1] == REFERENCE_TYPE:
--> 326                 raise Py4JJavaError(
    327                     "An error occurred while calling {0}{1}{2}.\n".
    328                     format(target_id, ".", name), value)

Py4JJavaError: An error occurred while calling o4804.save.
: org.apache.spark.SparkException: Job aborted.
	at org.apache.spark.sql.errors.QueryExecutionErrors$.jobAbortedError(QueryExecutionErrors.scala:496)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:251)
	at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
	at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
	at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
	at org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
	at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
	at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
	at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
	at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
	at org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
	at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
	at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
	at org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
	at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.base/java.lang.reflect.Method.invoke(Method.java:566)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
	at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
	at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 25.0 failed 4 times, most recent failure: Lost task 0.3 in stage 25.0 (TID 9500) (10.42.32.15 executor 10): java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)

Driver stacktrace:
	at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
	at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
	at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
	at scala.Option.foreach(Option.scala:407)
	at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
	at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
	... 41 more
Caused by: java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)




df.write.parquet("/opt/spark/work-dir/falk/F01test_df.parquet", mode="overwrite")


21/08/29 19:06:30 WARN TaskSetManager: Lost task 2.0 in stage 26.0 (TID 9543) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_20210829190630570334759957727637_0026_m_000002_9543 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)

21/08/29 19:06:30 WARN TaskSetManager: Lost task 1.0 in stage 26.0 (TID 9542) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906306992160257769852924_0026_m_000001_9542 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)

21/08/29 19:06:30 WARN TaskSetManager: Lost task 4.0 in stage 26.0 (TID 9545) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906305635902832664702349_0026_m_000004_9545 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)

21/08/29 19:06:30 WARN TaskSetManager: Lost task 10.0 in stage 26.0 (TID 9551) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303695223706240035696_0026_m_000010_9551 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)

21/08/29 19:06:30 WARN TaskSetManager: Lost task 2.1 in stage 26.0 (TID 9552) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303153023682655991980_0026_m_000002_9552 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)

21/08/29 19:06:30 WARN TaskSetManager: Lost task 1.1 in stage 26.0 (TID 9553) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303560179987922074406_0026_m_000001_9553 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)

21/08/29 19:06:31 WARN TaskSetManager: Lost task 2.2 in stage 26.0 (TID 9556) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906307237274992181823763_0026_m_000002_9556 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)

21/08/29 19:06:31 WARN TaskSetManager: Lost task 4.1 in stage 26.0 (TID 9554) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906306864123848918470508_0026_m_000004_9554 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)

21/08/29 19:06:31 WARN TaskSetManager: Lost task 10.1 in stage 26.0 (TID 9555) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302601970797047480301_0026_m_000010_9555 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)

21/08/29 19:06:31 WARN TaskSetManager: Lost task 0.0 in stage 26.0 (TID 9541) (10.42.192.9 executor 8): java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304154332762277555982_0026_m_000000_9541 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)

21/08/29 19:06:31 WARN TaskSetManager: Lost task 7.0 in stage 26.0 (TID 9548) (10.42.240.4 executor 4): java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_20210829190630694656629969727231_0026_m_000007_9548 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)

21/08/29 19:06:31 WARN TaskSetManager: Lost task 4.2 in stage 26.0 (TID 9559) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303848774759656984701_0026_m_000004_9559 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)

21/08/29 19:06:31 WARN TaskSetManager: Lost task 2.3 in stage 26.0 (TID 9558) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)

21/08/29 19:06:31 ERROR TaskSetManager: Task 2 in stage 26.0 failed 4 times; aborting job
21/08/29 19:06:31 WARN TaskSetManager: Lost task 1.2 in stage 26.0 (TID 9557) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302130961573080351978_0026_m_000001_9557 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)

21/08/29 19:06:31 WARN TaskSetManager: Lost task 8.0 in stage 26.0 (TID 9549) (10.42.0.17 executor 6): java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906306776907150898092479_0026_m_000008_9549 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)

21/08/29 19:06:31 WARN TaskSetManager: Lost task 5.0 in stage 26.0 (TID 9546) (10.42.96.9 executor 7): java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906301725332653584503335_0026_m_000005_9546 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)

21/08/29 19:06:31 WARN TaskSetManager: Lost task 11.0 in stage 26.0 (TID 9561) (10.42.240.4 executor 4): java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302182889441465469285_0026_m_000011_9561 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)

21/08/29 19:06:31 WARN TaskSetManager: Lost task 0.1 in stage 26.0 (TID 9563) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304682784428756208427_0026_m_000000_9563 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)

21/08/29 19:06:31 ERROR FileFormatWriter: Aborting job 115cf3ce-5a9b-4274-8752-b6ead281f104.
org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 in stage 26.0 failed 4 times, most recent failure: Lost task 2.3 in stage 26.0 (TID 9558) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)

Driver stacktrace:
	at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
	at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
	at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
	at scala.Option.foreach(Option.scala:407)
	at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
	at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
	at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
	at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
	at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
	at org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
	at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
	at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
	at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
	at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
	at org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
	at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
	at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
	at org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
	at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
	at org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:781)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.base/java.lang.reflect.Method.invoke(Method.java:566)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
	at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
	at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)
21/08/29 19:06:31 WARN TaskSetManager: Lost task 7.1 in stage 26.0 (TID 9562) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304432517406660432032_0026_m_000007_9562 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)

21/08/29 19:06:31 WARN TaskSetManager: Lost task 9.0 in stage 26.0 (TID 9550) (10.42.128.10 executor 9): java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906305148471752983202631_0026_m_000009_9550 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)

21/08/29 19:06:31 WARN TaskSetManager: Lost task 14.0 in stage 26.0 (TID 9566) (10.42.96.9 executor 7): TaskKilled (Stage cancelled)
21/08/29 19:06:31 WARN TaskSetManager: Lost task 6.0 in stage 26.0 (TID 9547) (10.42.32.12 executor 11): TaskKilled (Stage cancelled)
21/08/29 19:06:31 WARN TaskSetManager: Lost task 13.0 in stage 26.0 (TID 9565) (10.42.0.17 executor 6): TaskKilled (Stage cancelled)
21/08/29 19:06:31 WARN TaskSetManager: Lost task 12.0 in stage 26.0 (TID 9564) (10.42.0.12 executor 3): TaskKilled (Stage cancelled)
21/08/29 19:06:31 WARN TaskSetManager: Lost task 10.2 in stage 26.0 (TID 9560) (10.42.192.9 executor 8): TaskKilled (Stage cancelled)
[Stage 26:>                                                       (0 + 1) / 132]
---------------------------------------------------------------------------
Py4JJavaError                             Traceback (most recent call last)
/tmp/ipykernel_80/610855484.py in <module>
----> 1 df.write.parquet("/opt/spark/work-dir/falk/F01test_df.parquet", mode="overwrite")

/opt/spark/python/pyspark/sql/readwriter.py in parquet(self, path, mode, partitionBy, compression)
    883             self.partitionBy(partitionBy)
    884         self._set_opts(compression=compression)
--> 885         self._jwrite.parquet(path)
    886 
    887     def text(self, path, compression=None, lineSep=None):

/opt/conda/lib/python3.9/site-packages/py4j/java_gateway.py in __call__(self, *args)
   1307 
   1308         answer = self.gateway_client.send_command(command)
-> 1309         return_value = get_return_value(
   1310             answer, self.gateway_client, self.target_id, self.name)
   1311 

/opt/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
    109     def deco(*a, **kw):
    110         try:
--> 111             return f(*a, **kw)
    112         except py4j.protocol.Py4JJavaError as e:
    113             converted = convert_exception(e.java_exception)

/opt/conda/lib/python3.9/site-packages/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
    324             value = OUTPUT_CONVERTER[type](answer[2:], gateway_client)
    325             if answer[1] == REFERENCE_TYPE:
--> 326                 raise Py4JJavaError(
    327                     "An error occurred while calling {0}{1}{2}.\n".
    328                     format(target_id, ".", name), value)

Py4JJavaError: An error occurred while calling o15435.parquet.
: org.apache.spark.SparkException: Job aborted.
	at org.apache.spark.sql.errors.QueryExecutionErrors$.jobAbortedError(QueryExecutionErrors.scala:496)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:251)
	at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
	at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
	at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
	at org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
	at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
	at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
	at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
	at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
	at org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
	at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
	at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
	at org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
	at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
	at org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:781)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.base/java.lang.reflect.Method.invoke(Method.java:566)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
	at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
	at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 in stage 26.0 failed 4 times, most recent failure: Lost task 2.3 in stage 26.0 (TID 9558) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)

Driver stacktrace:
	at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
	at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
	at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
	at scala.Option.foreach(Option.scala:407)
	at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
	at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
	... 42 more
Caused by: java.io.IOException: Mkdirs failed to create file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558 (exists=false, cwd=file:/opt/spark/work-dir)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
	at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
	at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.base/java.lang.Thread.run(Unknown Source)
21/08/29 19:06:31 WARN TaskSetManager: Lost task 3.0 in stage 26.0 (TID 9544) (10.42.0.18 executor 12): TaskKilled (Stage cancelled)










---------------------------------------------------------------------
To unsubscribe e-mail: user-unsubscribe@spark.apache.org


Re: Can’t write to PVC in K8S

Posted by Mich Talebzadeh <mi...@gmail.com>.
Hi Bjorn,

OK you are doing ETL with Spark


   1. Read data from somewhere
   2. Do something with that data in PySpark
   3. Write the result set somewhere


If you were doing this ETL with yarn or local mode how would this progress,
assuming you don't use K8


   view my Linkedin profile
<https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>



*Disclaimer:* Use it at your own risk. Any and all responsibility for any
loss, damage or destruction of data or any other property which may arise
from relying on this email's technical content is explicitly disclaimed.
The author will in no case be liable for any monetary damages arising from
such loss, damage or destruction.




On Thu, 2 Sept 2021 at 22:03, Bjørn Jørgensen <bj...@gmail.com>
wrote:

> Well, I have tried almost everything the last 2 days now.
>
> There is no user spark, and whatever I do with the executor image it only
> runs for 2 minutes in k8s and then restarts.
>
>
> The problem seems to be the nogroup that is writing files from executors.
> drwxr-xr-x  2    185 nogroup    4096 Sep  2 18:43 test14
>
>
> So is there anything that I can do with that? Or should I move on to minio
> or something else?
> I need to ETL 500 K - 94 GB of json files and save them somewhere.
>
> On 2021/08/31 21:09:25, Mich Talebzadeh <mi...@gmail.com>
> wrote:
> > I think Holden alluded to that.
> >
> > In a nutshell, users in Linux can belong to more than one group. In this
> > case you want to create a new group newgroup and add two users to that
> > group.Do this in the docker file as USER 0
> >
> > RUN groupadd newgroup
> > ## Now add the two users (these users need to exist)
> > RUN usermod -a -G newgroup jovyan
> > RUN usermod -a -G newgroup spark
> > ## set permission on the directory
> > RUN chgrp -R newgroup /path/to/the/directory
> > RUN chmod -R 770 /path/to/the/directory
> >
> > Check this thread as well
> >
> >
> https://superuser.com/questions/280994/give-write-permissions-to-multiple-users-on-a-folder-in-ubuntu
> >
> > HTH
> >
> >
> >
> >    view my Linkedin profile
> > <https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>
> >
> >
> >
> > *Disclaimer:* Use it at your own risk. Any and all responsibility for any
> > loss, damage or destruction of data or any other property which may arise
> > from relying on this email's technical content is explicitly disclaimed.
> > The author will in no case be liable for any monetary damages arising
> from
> > such loss, damage or destruction.
> >
> >
> >
> >
> > On Tue, 31 Aug 2021 at 20:50, Holden Karau <ho...@pigscanfly.ca> wrote:
> >
> > > You can change the UID of one of them to match, or you could add them
> both
> > > to a group and set permissions to 770.
> > >
> > > On Tue, Aug 31, 2021 at 12:18 PM Bjørn Jørgensen <
> bjornjorgensen@gmail.com>
> > > wrote:
> > >
> > >> Hi and thanks for all the good help.
> > >>
> > >> I will build jupyter on top of spark to be able to run jupyter in
> local
> > >> mode with the new koalas library. The new koalas library can be
> imported as
> > >> "from pyspark import pandas as ps".
> > >>
> > >> Then you can run spark on K8S the same way that you use pandas in a
> > >> notebook.
> > >>
> > >> The easiest way to get a PV in K8S is with NFS. And with NFS you will
> > >> find your files outside K8S without having to copy files out of a K8S
> PVC.
> > >>
> > >> With this setup I can use pandas code in a notebook with the power
> from a
> > >> K8S cluster, as a normal notebook with pandas code.
> > >> I hope that this project will be a easy way to convert from pandas to
> > >> spark on K8S.
> > >>
> > >>
> > >> I did some testing to day with file permission. Like  RUN mkdir -p
> > >> /home/files and RUN chmod g+w /home/files
> > >> But
> > >>
> > >> 185@myapp-38a8887b9cedae97-exec-1:~/work-dir$ id
> > >> uid=185(185) gid=0(root) groups=0(root)
> > >>
> > >>
> > >> jovyan@my-pyspark-notebook-f6d497958-t9rpk:~$ id
> > >> uid=1000(jovyan) gid=100(users) groups=100(users)
> > >>
> > >> so it did't work.
> > >>
> > >> What will be the best way to make jovyan and 185 write to the same
> > >> folder?
> > >> On 2021/08/30 23:00:40, Mich Talebzadeh <mi...@gmail.com>
> > >> wrote:
> > >> > To be specific uid=185 (spark user, AKA anonymous) and root are in
> the
> > >> same
> > >> > group in the docker image itself
> > >> >
> > >> >
> > >> > id
> > >> >
> > >> > uid=185(185) gid=0(root) groups=0(root)
> > >> >
> > >> >
> > >> > So in the docker image conf file, you can create your permanent
> > >> directory
> > >> > as root off /home say
> > >> >
> > >> > do it as root (USER 0)
> > >> >
> > >> >
> > >> > RUN mkdir -p /home/<MY-DIR>
> > >> >
> > >> > RUN chmod g+w /home/<MY-DIR>  ## give write permission to spark
> > >> >
> > >> >
> > >> > ARG spark_uid=185
> > >> > ..................
> > >> >
> > >> > # Specify the User that the actual main process will run as
> > >> >
> > >> > USER ${spark_uid}
> > >> >
> > >> >
> > >> >    view my Linkedin profile
> > >> > <https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>
> > >> >
> > >> >
> > >> >
> > >> > *Disclaimer:* Use it at your own risk. Any and all responsibility
> for
> > >> any
> > >> > loss, damage or destruction of data or any other property which may
> > >> arise
> > >> > from relying on this email's technical content is explicitly
> disclaimed.
> > >> > The author will in no case be liable for any monetary damages
> arising
> > >> from
> > >> > such loss, damage or destruction.
> > >> >
> > >> >
> > >> >
> > >> >
> > >> > On Mon, 30 Aug 2021 at 22:26, Mich Talebzadeh <
> > >> mich.talebzadeh@gmail.com>
> > >> > wrote:
> > >> >
> > >> > > Forgot to mention that Spark uses that work directory to unzip the
> > >> zipped
> > >> > > files or gunzip archive files
> > >> > >
> > >> > > For example
> > >> > >
> > >> > > pyFiles
> > >>  gs://axial-glow-224522-spark-on-k8s/codes/DSBQ.zip
> > >> > >
> > >> > >
> > >> > > Spark will use that $SPARK_HOME/work-dir to unzip DSBQ.zip which
> is
> > >> the
> > >> > > application package here
> > >> > >
> > >> > >
> > >> > > The alternative is to hack the docker file to create a directory
> for
> > >> > > yourself
> > >> > >
> > >> > >
> > >> > > RUN mkdir -p /home/conf
> > >> > >
> > >> > > RUN chmod g+w /home/conf
> > >> > >
> > >> > >
> > >> > > HTH
> > >> > >
> > >> > >
> > >> > > *Disclaimer:* Use it at your own risk. Any and all responsibility
> for
> > >> any
> > >> > > loss, damage or destruction of data or any other property which
> may
> > >> arise
> > >> > > from relying on this email's technical content is explicitly
> > >> disclaimed.
> > >> > > The author will in no case be liable for any monetary damages
> arising
> > >> from
> > >> > > such loss, damage or destruction.
> > >> > >
> > >> > >
> > >> > >
> > >> > >
> > >> > >
> > >> > >
> > >> > > On Mon, 30 Aug 2021 at 22:13, Mich Talebzadeh <
> > >> mich.talebzadeh@gmail.com>
> > >> > > wrote:
> > >> > >
> > >> > >> I am not familiar with  jupyterlab  so cannot comment on that.
> > >> > >>
> > >> > >> However, once your parquet file is written to the work-dir, how
> are
> > >> you
> > >> > >> going to utilise it?
> > >> > >>
> > >> > >> HTH
> > >> > >>
> > >> > >>
> > >> > >>
> > >> > >>
> > >> > >>    view my Linkedin profile
> > >> > >> <https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>
> > >> > >>
> > >> > >>
> > >> > >>
> > >> > >> *Disclaimer:* Use it at your own risk. Any and all
> responsibility for
> > >> > >> any loss, damage or destruction of data or any other property
> which
> > >> may
> > >> > >> arise from relying on this email's technical content is
> explicitly
> > >> > >> disclaimed. The author will in no case be liable for any monetary
> > >> damages
> > >> > >> arising from such loss, damage or destruction.
> > >> > >>
> > >> > >>
> > >> > >>
> > >> > >>
> > >> > >> On Mon, 30 Aug 2021 at 22:05, Bjørn Jørgensen <
> > >> bjornjorgensen@gmail.com>
> > >> > >> wrote:
> > >> > >>
> > >> > >>> ok, so when I use spark on k8s I can only save files to s3
> buckets
> > >> or to
> > >> > >>> a database?
> > >> > >>>
> > >> > >>> Note my setup, its spark with jupyterlab on top on k8s.
> > >> > >>>
> > >> > >>> What are those for if I cant write files from spark in k8s to
> disk?
> > >> > >>>
> > >> > >>>
> > >>
> "spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
> > >> > >>> "False"
> > >> > >>>
> > >>
> "spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
> > >> > >>> "False"
> > >> > >>>
> > >> > >>> On 2021/08/30 20:50:22, Mich Talebzadeh <
> mich.talebzadeh@gmail.com>
> > >> > >>> wrote:
> > >> > >>> > Hi,
> > >> > >>> >
> > >> > >>> > You are trying to write to work-dir inside the docker and
> create
> > >> > >>> > sub-directories:
> > >> > >>> >
> > >> > >>> > The error you are getting is this
> > >> > >>> >
> > >> > >>> > Mkdirs failed to create
> > >> > >>> >
> > >> > >>>
> > >>
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304682784428756208427_0026_m_000000_9563
> > >> > >>> > (exists=false, cwd=file:/opt/spark/work-dir)
> > >> > >>> >
> > >> > >>> > That directory /work-dir is not recognised as a valid
> directory
> > >> > >>> > for storage. It is not in HDFS or HCFS format
> > >> > >>> >
> > >> > >>> >
> > >> > >>> > From Spark you can write to a bucket outside as a permanent
> > >> storage.
> > >> > >>> >
> > >> > >>> > HTH
> > >> > >>> >
> > >> > >>> >
> > >> > >>> >    view my Linkedin profile
> > >> > >>> > <https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>
> > >> > >>> >
> > >> > >>> >
> > >> > >>> >
> > >> > >>> > *Disclaimer:* Use it at your own risk. Any and all
> responsibility
> > >> for
> > >> > >>> any
> > >> > >>> > loss, damage or destruction of data or any other property
> which
> > >> may
> > >> > >>> arise
> > >> > >>> > from relying on this email's technical content is explicitly
> > >> > >>> disclaimed.
> > >> > >>> > The author will in no case be liable for any monetary damages
> > >> arising
> > >> > >>> from
> > >> > >>> > such loss, damage or destruction.
> > >> > >>> >
> > >> > >>> >
> > >> > >>> >
> > >> > >>> >
> > >> > >>> > On Mon, 30 Aug 2021 at 14:11, Bjørn Jørgensen <
> > >> > >>> bjornjorgensen@gmail.com>
> > >> > >>> > wrote:
> > >> > >>> >
> > >> > >>> > > Hi, I have built and running spark on k8s. A link to my repo
> > >> > >>> > > https://github.com/bjornjorgensen/jlpyk8s
> > >> > >>> > >
> > >> > >>> > > Everything seems to be running fine, but I can’t save to
> PVC.
> > >> > >>> > > If I convert the dataframe to pandas, then I can save it.
> > >> > >>> > >
> > >> > >>> > >
> > >> > >>> > >
> > >> > >>> > > from pyspark.sql import SparkSession
> > >> > >>> > > spark = SparkSession.builder \
> > >> > >>> > >     .master("k8s://
> > >> https://kubernetes.default.svc.cluster.local:443")
> > >> > >>> \
> > >> > >>> > >     .config("spark.kubernetes.container.image",
> > >> > >>> > > "bjornjorgensen/spark-py:v3.2-290821") \
> > >> > >>> > >     .config("spark.kubernetes.authenticate.caCertFile",
> > >> > >>> "/var/run/secrets/
> > >> > >>> > > kubernetes.io/serviceaccount/ca.crt") \
> > >> > >>> > >     .config("spark.kubernetes.authenticate.oauthTokenFile",
> > >> > >>> > > "/var/run/secrets/kubernetes.io/serviceaccount/token") \
> > >> > >>> > >
> > >> > >>>
> .config("spark.kubernetes.authenticate.driver.serviceAccountName",
> > >> > >>> > > "my-pyspark-notebook") \
> > >> > >>> > >     .config("spark.executor.instances", "10") \
> > >> > >>> > >     .config("spark.driver.host",
> > >> > >>> > >
> "my-pyspark-notebook-spark-driver.default.svc.cluster.local") \
> > >> > >>> > >     .config("spark.driver.port", "29413") \
> > >> > >>> > >
> > >> > >>> > >
> > >> > >>>
> > >>
> .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.options.claimName",
> > >> > >>> > > "nfs100") \
> > >> > >>> > >
> > >> > >>> > >
> > >> > >>>
> > >>
> .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.path",
> > >> > >>> > > "/opt/spark/work-dir") \
> > >> > >>> > >
> > >> > >>> > >
> > >> > >>>
> > >>
> .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.options.claimName",
> > >> > >>> > > "nfs100") \
> > >> > >>> > >
> > >> > >>> > >
> > >> > >>>
> > >>
> .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.path",
> > >> > >>> > > "/opt/spark/work-dir") \
> > >> > >>> > >
> > >> > >>> > >
> > >> > >>>
> > >>
> .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
> > >> > >>> > > "False") \
> > >> > >>> > >
> > >> > >>> > >
> > >> > >>>
> > >>
> .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
> > >> > >>> > > "False") \
> > >> > >>> > >     .appName("myApp") \
> > >> > >>> > >     .config("spark.sql.repl.eagerEval.enabled", "True") \
> > >> > >>> > >     .config("spark.driver.memory", "4g") \
> > >> > >>> > >     .config("spark.executor.memory", "4g") \
> > >> > >>> > >     .getOrCreate()
> > >> > >>> > > sc = spark.sparkContext
> > >> > >>> > >
> > >> > >>> > >
> pdf.to_parquet("/opt/spark/work-dir/falk/test/F01test.parquet")
> > >> > >>> > >
> > >> > >>> > >
> > >> > >>> > > 21/08/30 12:20:34 WARN WindowExec: No Partition Defined for
> > >> Window
> > >> > >>> > > operation! Moving all data to a single partition, this can
> cause
> > >> > >>> serious
> > >> > >>> > > performance degradation.
> > >> > >>> > > 21/08/30 12:20:34 WARN WindowExec: No Partition Defined for
> > >> Window
> > >> > >>> > > operation! Moving all data to a single partition, this can
> cause
> > >> > >>> serious
> > >> > >>> > > performance degradation.
> > >> > >>> > > 21/08/30 12:20:37 WARN WindowExec: No Partition Defined for
> > >> Window
> > >> > >>> > > operation! Moving all data to a single partition, this can
> cause
> > >> > >>> serious
> > >> > >>> > > performance degradation.
> > >> > >>> > > 21/08/30 12:20:39 WARN TaskSetManager: Lost task 0.0 in
> stage
> > >> 25.0
> > >> > >>> (TID
> > >> > >>> > > 9497) (10.42.0.16 executor 3): java.io.IOException: Mkdirs
> > >> failed to
> > >> > >>> create
> > >> > >>> > >
> > >> > >>>
> > >>
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220375889526593865835092_0025_m_000000_9497
> > >> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >> > >>> > >         at
> > >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >> > >>> > >         at
> > >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >> > >>> > >         at
> org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> > >> > >>> Source)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> > >> > >>> Source)
> > >> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >> > >>> > >
> > >> > >>> > > 21/08/30 12:20:40 WARN TaskSetManager: Lost task 0.1 in
> stage
> > >> 25.0
> > >> > >>> (TID
> > >> > >>> > > 9498) (10.42.32.11 executor 2): java.io.IOException: Mkdirs
> > >> failed to
> > >> > >>> > > create
> > >> > >>> > >
> > >> > >>>
> > >>
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220371965695886629589207_0025_m_000000_9498
> > >> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >> > >>> > >         at
> > >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >> > >>> > >         at
> > >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >> > >>> > >         at
> org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> > >> > >>> Source)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> > >> > >>> Source)
> > >> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >> > >>> > >
> > >> > >>> > > 21/08/30 12:20:42 WARN TaskSetManager: Lost task 0.2 in
> stage
> > >> 25.0
> > >> > >>> (TID
> > >> > >>> > > 9499) (10.42.240.4 executor 4): java.io.IOException: Mkdirs
> > >> failed to
> > >> > >>> > > create
> > >> > >>> > >
> > >> > >>>
> > >>
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220378533320694235394580_0025_m_000000_9499
> > >> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >> > >>> > >         at
> > >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >> > >>> > >         at
> > >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >> > >>> > >         at
> org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> > >> > >>> Source)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> > >> > >>> Source)
> > >> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >> > >>> > >
> > >> > >>> > > 21/08/30 12:20:43 WARN TaskSetManager: Lost task 0.3 in
> stage
> > >> 25.0
> > >> > >>> (TID
> > >> > >>> > > 9500) (10.42.32.15 executor 10): java.io.IOException: Mkdirs
> > >> failed
> > >> > >>> to
> > >> > >>> > > create
> > >> > >>> > >
> > >> > >>>
> > >>
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> > >> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >> > >>> > >         at
> > >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >> > >>> > >         at
> > >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >> > >>> > >         at
> org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> > >> > >>> Source)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> > >> > >>> Source)
> > >> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >> > >>> > >
> > >> > >>> > > 21/08/30 12:20:43 ERROR TaskSetManager: Task 0 in stage 25.0
> > >> failed 4
> > >> > >>> > > times; aborting job
> > >> > >>> > > 21/08/30 12:20:43 ERROR FileFormatWriter: Aborting job
> > >> > >>> > > d98cdc60-bb44-4189-b483-8449fc793658.
> > >> > >>> > > org.apache.spark.SparkException: Job aborted due to stage
> > >> failure:
> > >> > >>> Task 0
> > >> > >>> > > in stage 25.0 failed 4 times, most recent failure: Lost task
> > >> 0.3 in
> > >> > >>> stage
> > >> > >>> > > 25.0 (TID 9500) (10.42.32.15 executor 10):
> java.io.IOException:
> > >> > >>> Mkdirs
> > >> > >>> > > failed to create
> > >> > >>> > >
> > >> > >>>
> > >>
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> > >> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >> > >>> > >         at
> > >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >> > >>> > >         at
> > >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >> > >>> > >         at
> org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> > >> > >>> Source)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> > >> > >>> Source)
> > >> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >> > >>> > >
> > >> > >>> > > Driver stacktrace:
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
> > >> > >>> > >         at scala.Option.foreach(Option.scala:407)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
> > >> > >>> > >         at
> > >> > >>> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >> org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
> > >> > >>> > >         at
> > >> > >>> org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
> > >> > >>> > >         at
> > >> > >>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
> > >> > >>> > >
> > >> > >>>
> > >>
> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
> > >> > >>> > > Method)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> > >> > >>> > >         at
> > >> java.base/java.lang.reflect.Method.invoke(Method.java:566)
> > >> > >>> > >         at
> > >> > >>> py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
> > >> > >>> > >         at py4j.Gateway.invoke(Gateway.java:282)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
> > >> > >>> > >         at
> > >> py4j.commands.CallCommand.execute(CallCommand.java:79)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
> > >> > >>> > >         at
> > >> > >>> py4j.ClientServerConnection.run(ClientServerConnection.java:106)
> > >> > >>> > >         at java.base/java.lang.Thread.run(Thread.java:829)
> > >> > >>> > > Caused by: java.io.IOException: Mkdirs failed to create
> > >> > >>> > >
> > >> > >>>
> > >>
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> > >> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >> > >>> > >         at
> > >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >> > >>> > >         at
> > >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >> > >>> > >         at
> org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> > >> > >>> Source)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> > >> > >>> Source)
> > >> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >> > >>> > >
> > >> > >>>
> > >>
> ---------------------------------------------------------------------------
> > >> > >>> > > Py4JJavaError                             Traceback (most
> recent
> > >> > >>> call last)
> > >> > >>> > > /tmp/ipykernel_80/163396320.py in <module>
> > >> > >>> > > ----> 1
> > >> > >>> pdf.to_parquet("/opt/spark/work-dir/falk/test/F01test.parquet")
> > >> > >>> > >
> > >> > >>> > > /opt/spark/python/pyspark/pandas/frame.py in
> to_parquet(self,
> > >> path,
> > >> > >>> mode,
> > >> > >>> > > partition_cols, compression, index_col, **options)
> > >> > >>> > >    4721         if compression is not None:
> > >> > >>> > >    4722             builder.option("compression",
> compression)
> > >> > >>> > > -> 4723
> > >> > >>>  builder.options(**options).format("parquet").save(path)
> > >> > >>> > >    4724
> > >> > >>> > >    4725     def to_orc(
> > >> > >>> > >
> > >> > >>> > > /opt/spark/python/pyspark/sql/readwriter.py in save(self,
> path,
> > >> > >>> format,
> > >> > >>> > > mode, partitionBy, **options)
> > >> > >>> > >     738             self._jwrite.save()
> > >> > >>> > >     739         else:
> > >> > >>> > > --> 740             self._jwrite.save(path)
> > >> > >>> > >     741
> > >> > >>> > >     742     @since(1.4)
> > >> > >>> > >
> > >> > >>> > > /opt/conda/lib/python3.9/site-packages/py4j/java_gateway.py
> in
> > >> > >>> > > __call__(self, *args)
> > >> > >>> > >    1307
> > >> > >>> > >    1308         answer =
> > >> self.gateway_client.send_command(command)
> > >> > >>> > > -> 1309         return_value = get_return_value(
> > >> > >>> > >    1310             answer, self.gateway_client,
> self.target_id,
> > >> > >>> self.name
> > >> > >>> > > )
> > >> > >>> > >    1311
> > >> > >>> > >
> > >> > >>> > > /opt/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
> > >> > >>> > >     109     def deco(*a, **kw):
> > >> > >>> > >     110         try:
> > >> > >>> > > --> 111             return f(*a, **kw)
> > >> > >>> > >     112         except py4j.protocol.Py4JJavaError as e:
> > >> > >>> > >     113             converted =
> > >> convert_exception(e.java_exception)
> > >> > >>> > >
> > >> > >>> > > /opt/conda/lib/python3.9/site-packages/py4j/protocol.py in
> > >> > >>> > > get_return_value(answer, gateway_client, target_id, name)
> > >> > >>> > >     324             value =
> OUTPUT_CONVERTER[type](answer[2:],
> > >> > >>> > > gateway_client)
> > >> > >>> > >     325             if answer[1] == REFERENCE_TYPE:
> > >> > >>> > > --> 326                 raise Py4JJavaError(
> > >> > >>> > >     327                     "An error occurred while calling
> > >> > >>> {0}{1}{2}.\n".
> > >> > >>> > >     328                     format(target_id, ".", name),
> value)
> > >> > >>> > >
> > >> > >>> > > Py4JJavaError: An error occurred while calling o4804.save.
> > >> > >>> > > : org.apache.spark.SparkException: Job aborted.
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.errors.QueryExecutionErrors$.jobAbortedError(QueryExecutionErrors.scala:496)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:251)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
> > >> > >>> > >         at
> > >> > >>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
> > >> > >>> > >
> > >> > >>>
> > >>
> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
> > >> > >>> > > Method)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> > >> > >>> > >         at
> > >> java.base/java.lang.reflect.Method.invoke(Method.java:566)
> > >> > >>> > >         at
> > >> > >>> py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
> > >> > >>> > >         at py4j.Gateway.invoke(Gateway.java:282)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
> > >> > >>> > >         at
> > >> py4j.commands.CallCommand.execute(CallCommand.java:79)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
> > >> > >>> > >         at
> > >> > >>> py4j.ClientServerConnection.run(ClientServerConnection.java:106)
> > >> > >>> > >         at java.base/java.lang.Thread.run(Thread.java:829)
> > >> > >>> > > Caused by: org.apache.spark.SparkException: Job aborted due
> to
> > >> stage
> > >> > >>> > > failure: Task 0 in stage 25.0 failed 4 times, most recent
> > >> failure:
> > >> > >>> Lost
> > >> > >>> > > task 0.3 in stage 25.0 (TID 9500) (10.42.32.15 executor 10):
> > >> > >>> > > java.io.IOException: Mkdirs failed to create
> > >> > >>> > >
> > >> > >>>
> > >>
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> > >> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >> > >>> > >         at
> > >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >> > >>> > >         at
> > >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >> > >>> > >         at
> org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> > >> > >>> Source)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> > >> > >>> Source)
> > >> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >> > >>> > >
> > >> > >>> > > Driver stacktrace:
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
> > >> > >>> > >         at scala.Option.foreach(Option.scala:407)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
> > >> > >>> > >         at
> > >> > >>> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >> org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
> > >> > >>> > >         at
> > >> > >>> org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
> > >> > >>> > >         ... 41 more
> > >> > >>> > > Caused by: java.io.IOException: Mkdirs failed to create
> > >> > >>> > >
> > >> > >>>
> > >>
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> > >> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >> > >>> > >         at
> > >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >> > >>> > >         at
> > >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >> > >>> > >         at
> org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> > >> > >>> Source)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> > >> > >>> Source)
> > >> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >> > >>> > >
> > >> > >>> > >
> > >> > >>> > >
> > >> > >>> > >
> > >> > >>> > >
> df.write.parquet("/opt/spark/work-dir/falk/F01test_df.parquet",
> > >> > >>> > > mode="overwrite")
> > >> > >>> > >
> > >> > >>> > >
> > >> > >>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 2.0 in
> stage
> > >> 26.0
> > >> > >>> (TID
> > >> > >>> > > 9543) (10.42.240.3 executor 1): java.io.IOException: Mkdirs
> > >> failed to
> > >> > >>> > > create
> > >> > >>> > >
> > >> > >>>
> > >>
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_20210829190630570334759957727637_0026_m_000002_9543
> > >> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >> > >>> > >         at
> > >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >> > >>> > >         at
> > >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >> > >>> > >         at
> org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> > >> > >>> Source)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> > >> > >>> Source)
> > >> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >> > >>> > >
> > >> > >>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 1.0 in
> stage
> > >> 26.0
> > >> > >>> (TID
> > >> > >>> > > 9542) (10.42.32.11 executor 2): java.io.IOException: Mkdirs
> > >> failed to
> > >> > >>> > > create
> > >> > >>> > >
> > >> > >>>
> > >>
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906306992160257769852924_0026_m_000001_9542
> > >> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >> > >>> > >         at
> > >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >> > >>> > >         at
> > >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >> > >>> > >         at
> org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> > >> > >>> Source)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> > >> > >>> Source)
> > >> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >> > >>> > >
> > >> > >>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 4.0 in
> stage
> > >> 26.0
> > >> > >>> (TID
> > >> > >>> > > 9545) (10.42.0.12 executor 3): java.io.IOException: Mkdirs
> > >> failed to
> > >> > >>> create
> > >> > >>> > >
> > >> > >>>
> > >>
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906305635902832664702349_0026_m_000004_9545
> > >> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >> > >>> > >         at
> > >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >> > >>> > >         at
> > >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >> > >>> > >         at
> org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> > >> > >>> Source)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> > >> > >>> Source)
> > >> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >> > >>> > >
> > >> > >>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 10.0 in
> stage
> > >> 26.0
> > >> > >>> (TID
> > >> > >>> > > 9551) (10.42.240.3 executor 1): java.io.IOException: Mkdirs
> > >> failed to
> > >> > >>> > > create
> > >> > >>> > >
> > >> > >>>
> > >>
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303695223706240035696_0026_m_000010_9551
> > >> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >> > >>> > >         at
> > >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >> > >>> > >         at
> > >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >> > >>> > >         at
> org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> > >> > >>> Source)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> > >> > >>> Source)
> > >> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >> > >>> > >
> > >> > >>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 2.1 in
> stage
> > >> 26.0
> > >> > >>> (TID
> > >> > >>> > > 9552) (10.42.32.11 executor 2): java.io.IOException: Mkdirs
> > >> failed to
> > >> > >>> > > create
> > >> > >>> > >
> > >> > >>>
> > >>
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303153023682655991980_0026_m_000002_9552
> > >> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >> > >>> > >         at
> > >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >> > >>> > >         at
> > >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >> > >>> > >         at
> > >> > >>> > >
> > >> > >>>
> > >>
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >> > >
> > >
> > > --
> > > Twitter: https://twitter.com/holdenkarau
> > > Books (Learning Spark, High Performance Spark, etc.):
> > > https://amzn.to/2MaRAG9  <https://amzn.to/2MaRAG9>
> > > YouTube Live Streams: https://www.youtube.com/user/holdenkarau
> > >
> >
>
> ---------------------------------------------------------------------
> To unsubscribe e-mail: user-unsubscribe@spark.apache.org
>
>

Re: Can���t write to PVC in K8S

Posted by Bj��rn J��rgensen <bj...@gmail.com>.
Well, I have tried almost everything the last 2 days now. 

There is no user spark, and whatever I do with the executor image it only runs for 2 minutes in k8s and then restarts. 


The problem seems to be the nogroup that is writing files from executors. 
drwxr-xr-x  2    185 nogroup    4096 Sep  2 18:43 test14


So is there anything that I can do with that? Or should I move on to minio or something else? 
I need to ETL 500 K - 94 GB of json files and save them somewhere. 

On 2021/08/31 21:09:25, Mich Talebzadeh <mi...@gmail.com> wrote: 
> I think Holden alluded to that.
> 
> In a nutshell, users in Linux can belong to more than one group. In this
> case you want to create a new group newgroup and add two users to that
> group.Do this in the docker file as USER 0
> 
> RUN groupadd newgroup
> ## Now add the two users (these users need to exist)
> RUN usermod -a -G newgroup jovyan
> RUN usermod -a -G newgroup spark
> ## set permission on the directory
> RUN chgrp -R newgroup /path/to/the/directory
> RUN chmod -R 770 /path/to/the/directory
> 
> Check this thread as well
> 
> https://superuser.com/questions/280994/give-write-permissions-to-multiple-users-on-a-folder-in-ubuntu
> 
> HTH
> 
> 
> 
>    view my Linkedin profile
> <https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>
> 
> 
> 
> *Disclaimer:* Use it at your own risk. Any and all responsibility for any
> loss, damage or destruction of data or any other property which may arise
> from relying on this email's technical content is explicitly disclaimed.
> The author will in no case be liable for any monetary damages arising from
> such loss, damage or destruction.
> 
> 
> 
> 
> On Tue, 31 Aug 2021 at 20:50, Holden Karau <ho...@pigscanfly.ca> wrote:
> 
> > You can change the UID of one of them to match, or you could add them both
> > to a group and set permissions to 770.
> >
> > On Tue, Aug 31, 2021 at 12:18 PM Bjørn Jørgensen <bj...@gmail.com>
> > wrote:
> >
> >> Hi and thanks for all the good help.
> >>
> >> I will build jupyter on top of spark to be able to run jupyter in local
> >> mode with the new koalas library. The new koalas library can be imported as
> >> "from pyspark import pandas as ps".
> >>
> >> Then you can run spark on K8S the same way that you use pandas in a
> >> notebook.
> >>
> >> The easiest way to get a PV in K8S is with NFS. And with NFS you will
> >> find your files outside K8S without having to copy files out of a K8S PVC.
> >>
> >> With this setup I can use pandas code in a notebook with the power from a
> >> K8S cluster, as a normal notebook with pandas code.
> >> I hope that this project will be a easy way to convert from pandas to
> >> spark on K8S.
> >>
> >>
> >> I did some testing to day with file permission. Like  RUN mkdir -p
> >> /home/files and RUN chmod g+w /home/files
> >> But
> >>
> >> 185@myapp-38a8887b9cedae97-exec-1:~/work-dir$ id
> >> uid=185(185) gid=0(root) groups=0(root)
> >>
> >>
> >> jovyan@my-pyspark-notebook-f6d497958-t9rpk:~$ id
> >> uid=1000(jovyan) gid=100(users) groups=100(users)
> >>
> >> so it did't work.
> >>
> >> What will be the best way to make jovyan and 185 write to the same
> >> folder?
> >> On 2021/08/30 23:00:40, Mich Talebzadeh <mi...@gmail.com>
> >> wrote:
> >> > To be specific uid=185 (spark user, AKA anonymous) and root are in the
> >> same
> >> > group in the docker image itself
> >> >
> >> >
> >> > id
> >> >
> >> > uid=185(185) gid=0(root) groups=0(root)
> >> >
> >> >
> >> > So in the docker image conf file, you can create your permanent
> >> directory
> >> > as root off /home say
> >> >
> >> > do it as root (USER 0)
> >> >
> >> >
> >> > RUN mkdir -p /home/<MY-DIR>
> >> >
> >> > RUN chmod g+w /home/<MY-DIR>  ## give write permission to spark
> >> >
> >> >
> >> > ARG spark_uid=185
> >> > ..................
> >> >
> >> > # Specify the User that the actual main process will run as
> >> >
> >> > USER ${spark_uid}
> >> >
> >> >
> >> >    view my Linkedin profile
> >> > <https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>
> >> >
> >> >
> >> >
> >> > *Disclaimer:* Use it at your own risk. Any and all responsibility for
> >> any
> >> > loss, damage or destruction of data or any other property which may
> >> arise
> >> > from relying on this email's technical content is explicitly disclaimed.
> >> > The author will in no case be liable for any monetary damages arising
> >> from
> >> > such loss, damage or destruction.
> >> >
> >> >
> >> >
> >> >
> >> > On Mon, 30 Aug 2021 at 22:26, Mich Talebzadeh <
> >> mich.talebzadeh@gmail.com>
> >> > wrote:
> >> >
> >> > > Forgot to mention that Spark uses that work directory to unzip the
> >> zipped
> >> > > files or gunzip archive files
> >> > >
> >> > > For example
> >> > >
> >> > > pyFiles
> >>  gs://axial-glow-224522-spark-on-k8s/codes/DSBQ.zip
> >> > >
> >> > >
> >> > > Spark will use that $SPARK_HOME/work-dir to unzip DSBQ.zip which is
> >> the
> >> > > application package here
> >> > >
> >> > >
> >> > > The alternative is to hack the docker file to create a directory for
> >> > > yourself
> >> > >
> >> > >
> >> > > RUN mkdir -p /home/conf
> >> > >
> >> > > RUN chmod g+w /home/conf
> >> > >
> >> > >
> >> > > HTH
> >> > >
> >> > >
> >> > > *Disclaimer:* Use it at your own risk. Any and all responsibility for
> >> any
> >> > > loss, damage or destruction of data or any other property which may
> >> arise
> >> > > from relying on this email's technical content is explicitly
> >> disclaimed.
> >> > > The author will in no case be liable for any monetary damages arising
> >> from
> >> > > such loss, damage or destruction.
> >> > >
> >> > >
> >> > >
> >> > >
> >> > >
> >> > >
> >> > > On Mon, 30 Aug 2021 at 22:13, Mich Talebzadeh <
> >> mich.talebzadeh@gmail.com>
> >> > > wrote:
> >> > >
> >> > >> I am not familiar with  jupyterlab  so cannot comment on that.
> >> > >>
> >> > >> However, once your parquet file is written to the work-dir, how are
> >> you
> >> > >> going to utilise it?
> >> > >>
> >> > >> HTH
> >> > >>
> >> > >>
> >> > >>
> >> > >>
> >> > >>    view my Linkedin profile
> >> > >> <https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>
> >> > >>
> >> > >>
> >> > >>
> >> > >> *Disclaimer:* Use it at your own risk. Any and all responsibility for
> >> > >> any loss, damage or destruction of data or any other property which
> >> may
> >> > >> arise from relying on this email's technical content is explicitly
> >> > >> disclaimed. The author will in no case be liable for any monetary
> >> damages
> >> > >> arising from such loss, damage or destruction.
> >> > >>
> >> > >>
> >> > >>
> >> > >>
> >> > >> On Mon, 30 Aug 2021 at 22:05, Bjørn Jørgensen <
> >> bjornjorgensen@gmail.com>
> >> > >> wrote:
> >> > >>
> >> > >>> ok, so when I use spark on k8s I can only save files to s3 buckets
> >> or to
> >> > >>> a database?
> >> > >>>
> >> > >>> Note my setup, its spark with jupyterlab on top on k8s.
> >> > >>>
> >> > >>> What are those for if I cant write files from spark in k8s to disk?
> >> > >>>
> >> > >>>
> >> "spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
> >> > >>> "False"
> >> > >>>
> >> "spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
> >> > >>> "False"
> >> > >>>
> >> > >>> On 2021/08/30 20:50:22, Mich Talebzadeh <mi...@gmail.com>
> >> > >>> wrote:
> >> > >>> > Hi,
> >> > >>> >
> >> > >>> > You are trying to write to work-dir inside the docker and create
> >> > >>> > sub-directories:
> >> > >>> >
> >> > >>> > The error you are getting is this
> >> > >>> >
> >> > >>> > Mkdirs failed to create
> >> > >>> >
> >> > >>>
> >> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304682784428756208427_0026_m_000000_9563
> >> > >>> > (exists=false, cwd=file:/opt/spark/work-dir)
> >> > >>> >
> >> > >>> > That directory /work-dir is not recognised as a valid directory
> >> > >>> > for storage. It is not in HDFS or HCFS format
> >> > >>> >
> >> > >>> >
> >> > >>> > From Spark you can write to a bucket outside as a permanent
> >> storage.
> >> > >>> >
> >> > >>> > HTH
> >> > >>> >
> >> > >>> >
> >> > >>> >    view my Linkedin profile
> >> > >>> > <https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>
> >> > >>> >
> >> > >>> >
> >> > >>> >
> >> > >>> > *Disclaimer:* Use it at your own risk. Any and all responsibility
> >> for
> >> > >>> any
> >> > >>> > loss, damage or destruction of data or any other property which
> >> may
> >> > >>> arise
> >> > >>> > from relying on this email's technical content is explicitly
> >> > >>> disclaimed.
> >> > >>> > The author will in no case be liable for any monetary damages
> >> arising
> >> > >>> from
> >> > >>> > such loss, damage or destruction.
> >> > >>> >
> >> > >>> >
> >> > >>> >
> >> > >>> >
> >> > >>> > On Mon, 30 Aug 2021 at 14:11, Bjørn Jørgensen <
> >> > >>> bjornjorgensen@gmail.com>
> >> > >>> > wrote:
> >> > >>> >
> >> > >>> > > Hi, I have built and running spark on k8s. A link to my repo
> >> > >>> > > https://github.com/bjornjorgensen/jlpyk8s
> >> > >>> > >
> >> > >>> > > Everything seems to be running fine, but I can’t save to PVC.
> >> > >>> > > If I convert the dataframe to pandas, then I can save it.
> >> > >>> > >
> >> > >>> > >
> >> > >>> > >
> >> > >>> > > from pyspark.sql import SparkSession
> >> > >>> > > spark = SparkSession.builder \
> >> > >>> > >     .master("k8s://
> >> https://kubernetes.default.svc.cluster.local:443")
> >> > >>> \
> >> > >>> > >     .config("spark.kubernetes.container.image",
> >> > >>> > > "bjornjorgensen/spark-py:v3.2-290821") \
> >> > >>> > >     .config("spark.kubernetes.authenticate.caCertFile",
> >> > >>> "/var/run/secrets/
> >> > >>> > > kubernetes.io/serviceaccount/ca.crt") \
> >> > >>> > >     .config("spark.kubernetes.authenticate.oauthTokenFile",
> >> > >>> > > "/var/run/secrets/kubernetes.io/serviceaccount/token") \
> >> > >>> > >
> >> > >>>  .config("spark.kubernetes.authenticate.driver.serviceAccountName",
> >> > >>> > > "my-pyspark-notebook") \
> >> > >>> > >     .config("spark.executor.instances", "10") \
> >> > >>> > >     .config("spark.driver.host",
> >> > >>> > > "my-pyspark-notebook-spark-driver.default.svc.cluster.local") \
> >> > >>> > >     .config("spark.driver.port", "29413") \
> >> > >>> > >
> >> > >>> > >
> >> > >>>
> >> .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.options.claimName",
> >> > >>> > > "nfs100") \
> >> > >>> > >
> >> > >>> > >
> >> > >>>
> >> .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.path",
> >> > >>> > > "/opt/spark/work-dir") \
> >> > >>> > >
> >> > >>> > >
> >> > >>>
> >> .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.options.claimName",
> >> > >>> > > "nfs100") \
> >> > >>> > >
> >> > >>> > >
> >> > >>>
> >> .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.path",
> >> > >>> > > "/opt/spark/work-dir") \
> >> > >>> > >
> >> > >>> > >
> >> > >>>
> >> .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
> >> > >>> > > "False") \
> >> > >>> > >
> >> > >>> > >
> >> > >>>
> >> .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
> >> > >>> > > "False") \
> >> > >>> > >     .appName("myApp") \
> >> > >>> > >     .config("spark.sql.repl.eagerEval.enabled", "True") \
> >> > >>> > >     .config("spark.driver.memory", "4g") \
> >> > >>> > >     .config("spark.executor.memory", "4g") \
> >> > >>> > >     .getOrCreate()
> >> > >>> > > sc = spark.sparkContext
> >> > >>> > >
> >> > >>> > > pdf.to_parquet("/opt/spark/work-dir/falk/test/F01test.parquet")
> >> > >>> > >
> >> > >>> > >
> >> > >>> > > 21/08/30 12:20:34 WARN WindowExec: No Partition Defined for
> >> Window
> >> > >>> > > operation! Moving all data to a single partition, this can cause
> >> > >>> serious
> >> > >>> > > performance degradation.
> >> > >>> > > 21/08/30 12:20:34 WARN WindowExec: No Partition Defined for
> >> Window
> >> > >>> > > operation! Moving all data to a single partition, this can cause
> >> > >>> serious
> >> > >>> > > performance degradation.
> >> > >>> > > 21/08/30 12:20:37 WARN WindowExec: No Partition Defined for
> >> Window
> >> > >>> > > operation! Moving all data to a single partition, this can cause
> >> > >>> serious
> >> > >>> > > performance degradation.
> >> > >>> > > 21/08/30 12:20:39 WARN TaskSetManager: Lost task 0.0 in stage
> >> 25.0
> >> > >>> (TID
> >> > >>> > > 9497) (10.42.0.16 executor 3): java.io.IOException: Mkdirs
> >> failed to
> >> > >>> create
> >> > >>> > >
> >> > >>>
> >> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220375889526593865835092_0025_m_000000_9497
> >> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >> > >>> > >         at
> >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >> > >>> > >         at
> >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >> > >>> > >         at
> >> > >>> > >
> >> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >> > >>> Source)
> >> > >>> > >         at
> >> > >>> > >
> >> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >> > >>> Source)
> >> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >> > >>> > >
> >> > >>> > > 21/08/30 12:20:40 WARN TaskSetManager: Lost task 0.1 in stage
> >> 25.0
> >> > >>> (TID
> >> > >>> > > 9498) (10.42.32.11 executor 2): java.io.IOException: Mkdirs
> >> failed to
> >> > >>> > > create
> >> > >>> > >
> >> > >>>
> >> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220371965695886629589207_0025_m_000000_9498
> >> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >> > >>> > >         at
> >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >> > >>> > >         at
> >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >> > >>> > >         at
> >> > >>> > >
> >> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >> > >>> Source)
> >> > >>> > >         at
> >> > >>> > >
> >> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >> > >>> Source)
> >> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >> > >>> > >
> >> > >>> > > 21/08/30 12:20:42 WARN TaskSetManager: Lost task 0.2 in stage
> >> 25.0
> >> > >>> (TID
> >> > >>> > > 9499) (10.42.240.4 executor 4): java.io.IOException: Mkdirs
> >> failed to
> >> > >>> > > create
> >> > >>> > >
> >> > >>>
> >> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220378533320694235394580_0025_m_000000_9499
> >> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >> > >>> > >         at
> >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >> > >>> > >         at
> >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >> > >>> > >         at
> >> > >>> > >
> >> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >> > >>> Source)
> >> > >>> > >         at
> >> > >>> > >
> >> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >> > >>> Source)
> >> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >> > >>> > >
> >> > >>> > > 21/08/30 12:20:43 WARN TaskSetManager: Lost task 0.3 in stage
> >> 25.0
> >> > >>> (TID
> >> > >>> > > 9500) (10.42.32.15 executor 10): java.io.IOException: Mkdirs
> >> failed
> >> > >>> to
> >> > >>> > > create
> >> > >>> > >
> >> > >>>
> >> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> >> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >> > >>> > >         at
> >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >> > >>> > >         at
> >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >> > >>> > >         at
> >> > >>> > >
> >> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >> > >>> Source)
> >> > >>> > >         at
> >> > >>> > >
> >> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >> > >>> Source)
> >> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >> > >>> > >
> >> > >>> > > 21/08/30 12:20:43 ERROR TaskSetManager: Task 0 in stage 25.0
> >> failed 4
> >> > >>> > > times; aborting job
> >> > >>> > > 21/08/30 12:20:43 ERROR FileFormatWriter: Aborting job
> >> > >>> > > d98cdc60-bb44-4189-b483-8449fc793658.
> >> > >>> > > org.apache.spark.SparkException: Job aborted due to stage
> >> failure:
> >> > >>> Task 0
> >> > >>> > > in stage 25.0 failed 4 times, most recent failure: Lost task
> >> 0.3 in
> >> > >>> stage
> >> > >>> > > 25.0 (TID 9500) (10.42.32.15 executor 10): java.io.IOException:
> >> > >>> Mkdirs
> >> > >>> > > failed to create
> >> > >>> > >
> >> > >>>
> >> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> >> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >> > >>> > >         at
> >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >> > >>> > >         at
> >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >> > >>> > >         at
> >> > >>> > >
> >> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >> > >>> Source)
> >> > >>> > >         at
> >> > >>> > >
> >> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >> > >>> Source)
> >> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >> > >>> > >
> >> > >>> > > Driver stacktrace:
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
> >> > >>> > >         at
> >> > >>> > >
> >> scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
> >> > >>> > >         at scala.Option.foreach(Option.scala:407)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
> >> > >>> > >         at
> >> > >>> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
> >> > >>> > >         at
> >> > >>> org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
> >> > >>> > >         at
> >> > >>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
> >> > >>> > >
> >> > >>>
> >> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
> >> > >>> > > Method)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> >> > >>> > >         at
> >> java.base/java.lang.reflect.Method.invoke(Method.java:566)
> >> > >>> > >         at
> >> > >>> py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
> >> > >>> > >         at
> >> > >>> > >
> >> py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
> >> > >>> > >         at py4j.Gateway.invoke(Gateway.java:282)
> >> > >>> > >         at
> >> > >>> > >
> >> py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
> >> > >>> > >         at
> >> py4j.commands.CallCommand.execute(CallCommand.java:79)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
> >> > >>> > >         at
> >> > >>> py4j.ClientServerConnection.run(ClientServerConnection.java:106)
> >> > >>> > >         at java.base/java.lang.Thread.run(Thread.java:829)
> >> > >>> > > Caused by: java.io.IOException: Mkdirs failed to create
> >> > >>> > >
> >> > >>>
> >> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> >> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >> > >>> > >         at
> >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >> > >>> > >         at
> >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >> > >>> > >         at
> >> > >>> > >
> >> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >> > >>> Source)
> >> > >>> > >         at
> >> > >>> > >
> >> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >> > >>> Source)
> >> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >> > >>> > >
> >> > >>>
> >> ---------------------------------------------------------------------------
> >> > >>> > > Py4JJavaError                             Traceback (most recent
> >> > >>> call last)
> >> > >>> > > /tmp/ipykernel_80/163396320.py in <module>
> >> > >>> > > ----> 1
> >> > >>> pdf.to_parquet("/opt/spark/work-dir/falk/test/F01test.parquet")
> >> > >>> > >
> >> > >>> > > /opt/spark/python/pyspark/pandas/frame.py in to_parquet(self,
> >> path,
> >> > >>> mode,
> >> > >>> > > partition_cols, compression, index_col, **options)
> >> > >>> > >    4721         if compression is not None:
> >> > >>> > >    4722             builder.option("compression", compression)
> >> > >>> > > -> 4723
> >> > >>>  builder.options(**options).format("parquet").save(path)
> >> > >>> > >    4724
> >> > >>> > >    4725     def to_orc(
> >> > >>> > >
> >> > >>> > > /opt/spark/python/pyspark/sql/readwriter.py in save(self, path,
> >> > >>> format,
> >> > >>> > > mode, partitionBy, **options)
> >> > >>> > >     738             self._jwrite.save()
> >> > >>> > >     739         else:
> >> > >>> > > --> 740             self._jwrite.save(path)
> >> > >>> > >     741
> >> > >>> > >     742     @since(1.4)
> >> > >>> > >
> >> > >>> > > /opt/conda/lib/python3.9/site-packages/py4j/java_gateway.py in
> >> > >>> > > __call__(self, *args)
> >> > >>> > >    1307
> >> > >>> > >    1308         answer =
> >> self.gateway_client.send_command(command)
> >> > >>> > > -> 1309         return_value = get_return_value(
> >> > >>> > >    1310             answer, self.gateway_client, self.target_id,
> >> > >>> self.name
> >> > >>> > > )
> >> > >>> > >    1311
> >> > >>> > >
> >> > >>> > > /opt/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
> >> > >>> > >     109     def deco(*a, **kw):
> >> > >>> > >     110         try:
> >> > >>> > > --> 111             return f(*a, **kw)
> >> > >>> > >     112         except py4j.protocol.Py4JJavaError as e:
> >> > >>> > >     113             converted =
> >> convert_exception(e.java_exception)
> >> > >>> > >
> >> > >>> > > /opt/conda/lib/python3.9/site-packages/py4j/protocol.py in
> >> > >>> > > get_return_value(answer, gateway_client, target_id, name)
> >> > >>> > >     324             value = OUTPUT_CONVERTER[type](answer[2:],
> >> > >>> > > gateway_client)
> >> > >>> > >     325             if answer[1] == REFERENCE_TYPE:
> >> > >>> > > --> 326                 raise Py4JJavaError(
> >> > >>> > >     327                     "An error occurred while calling
> >> > >>> {0}{1}{2}.\n".
> >> > >>> > >     328                     format(target_id, ".", name), value)
> >> > >>> > >
> >> > >>> > > Py4JJavaError: An error occurred while calling o4804.save.
> >> > >>> > > : org.apache.spark.SparkException: Job aborted.
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.errors.QueryExecutionErrors$.jobAbortedError(QueryExecutionErrors.scala:496)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:251)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
> >> > >>> > >         at
> >> > >>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
> >> > >>> > >
> >> > >>>
> >> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
> >> > >>> > > Method)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> >> > >>> > >         at
> >> java.base/java.lang.reflect.Method.invoke(Method.java:566)
> >> > >>> > >         at
> >> > >>> py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
> >> > >>> > >         at
> >> > >>> > >
> >> py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
> >> > >>> > >         at py4j.Gateway.invoke(Gateway.java:282)
> >> > >>> > >         at
> >> > >>> > >
> >> py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
> >> > >>> > >         at
> >> py4j.commands.CallCommand.execute(CallCommand.java:79)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
> >> > >>> > >         at
> >> > >>> py4j.ClientServerConnection.run(ClientServerConnection.java:106)
> >> > >>> > >         at java.base/java.lang.Thread.run(Thread.java:829)
> >> > >>> > > Caused by: org.apache.spark.SparkException: Job aborted due to
> >> stage
> >> > >>> > > failure: Task 0 in stage 25.0 failed 4 times, most recent
> >> failure:
> >> > >>> Lost
> >> > >>> > > task 0.3 in stage 25.0 (TID 9500) (10.42.32.15 executor 10):
> >> > >>> > > java.io.IOException: Mkdirs failed to create
> >> > >>> > >
> >> > >>>
> >> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> >> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >> > >>> > >         at
> >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >> > >>> > >         at
> >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >> > >>> > >         at
> >> > >>> > >
> >> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >> > >>> Source)
> >> > >>> > >         at
> >> > >>> > >
> >> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >> > >>> Source)
> >> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >> > >>> > >
> >> > >>> > > Driver stacktrace:
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
> >> > >>> > >         at
> >> > >>> > >
> >> scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
> >> > >>> > >         at scala.Option.foreach(Option.scala:407)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
> >> > >>> > >         at
> >> > >>> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
> >> > >>> > >         at
> >> > >>> org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
> >> > >>> > >         ... 41 more
> >> > >>> > > Caused by: java.io.IOException: Mkdirs failed to create
> >> > >>> > >
> >> > >>>
> >> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> >> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >> > >>> > >         at
> >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >> > >>> > >         at
> >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >> > >>> > >         at
> >> > >>> > >
> >> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >> > >>> Source)
> >> > >>> > >         at
> >> > >>> > >
> >> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >> > >>> Source)
> >> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >> > >>> > >
> >> > >>> > >
> >> > >>> > >
> >> > >>> > >
> >> > >>> > > df.write.parquet("/opt/spark/work-dir/falk/F01test_df.parquet",
> >> > >>> > > mode="overwrite")
> >> > >>> > >
> >> > >>> > >
> >> > >>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 2.0 in stage
> >> 26.0
> >> > >>> (TID
> >> > >>> > > 9543) (10.42.240.3 executor 1): java.io.IOException: Mkdirs
> >> failed to
> >> > >>> > > create
> >> > >>> > >
> >> > >>>
> >> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_20210829190630570334759957727637_0026_m_000002_9543
> >> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >> > >>> > >         at
> >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >> > >>> > >         at
> >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >> > >>> > >         at
> >> > >>> > >
> >> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >> > >>> Source)
> >> > >>> > >         at
> >> > >>> > >
> >> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >> > >>> Source)
> >> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >> > >>> > >
> >> > >>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 1.0 in stage
> >> 26.0
> >> > >>> (TID
> >> > >>> > > 9542) (10.42.32.11 executor 2): java.io.IOException: Mkdirs
> >> failed to
> >> > >>> > > create
> >> > >>> > >
> >> > >>>
> >> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906306992160257769852924_0026_m_000001_9542
> >> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >> > >>> > >         at
> >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >> > >>> > >         at
> >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >> > >>> > >         at
> >> > >>> > >
> >> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >> > >>> Source)
> >> > >>> > >         at
> >> > >>> > >
> >> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >> > >>> Source)
> >> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >> > >>> > >
> >> > >>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 4.0 in stage
> >> 26.0
> >> > >>> (TID
> >> > >>> > > 9545) (10.42.0.12 executor 3): java.io.IOException: Mkdirs
> >> failed to
> >> > >>> create
> >> > >>> > >
> >> > >>>
> >> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906305635902832664702349_0026_m_000004_9545
> >> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >> > >>> > >         at
> >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >> > >>> > >         at
> >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >> > >>> > >         at
> >> > >>> > >
> >> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >> > >>> Source)
> >> > >>> > >         at
> >> > >>> > >
> >> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >> > >>> Source)
> >> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >> > >>> > >
> >> > >>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 10.0 in stage
> >> 26.0
> >> > >>> (TID
> >> > >>> > > 9551) (10.42.240.3 executor 1): java.io.IOException: Mkdirs
> >> failed to
> >> > >>> > > create
> >> > >>> > >
> >> > >>>
> >> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303695223706240035696_0026_m_000010_9551
> >> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >> > >>> > >         at
> >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >> > >>> > >         at
> >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >> > >>> > >         at
> >> > >>> > >
> >> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >> > >>> > >         at
> >> > >>> > >
> >> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >> > >>> Source)
> >> > >>> > >         at
> >> > >>> > >
> >> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >> > >>> Source)
> >> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >> > >>> > >
> >> > >>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 2.1 in stage
> >> 26.0
> >> > >>> (TID
> >> > >>> > > 9552) (10.42.32.11 executor 2): java.io.IOException: Mkdirs
> >> failed to
> >> > >>> > > create
> >> > >>> > >
> >> > >>>
> >> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303153023682655991980_0026_m_000002_9552
> >> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >> > >>> > >         at
> >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >> > >>> > >         at
> >> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >> > >>> > >         at
> >> > >>> > >
> >> > >>>
> >> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >> > >
> >
> > --
> > Twitter: https://twitter.com/holdenkarau
> > Books (Learning Spark, High Performance Spark, etc.):
> > https://amzn.to/2MaRAG9  <https://amzn.to/2MaRAG9>
> > YouTube Live Streams: https://www.youtube.com/user/holdenkarau
> >
> 

---------------------------------------------------------------------
To unsubscribe e-mail: user-unsubscribe@spark.apache.org


Re: Can’t write to PVC in K8S

Posted by Mich Talebzadeh <mi...@gmail.com>.
I think Holden alluded to that.

In a nutshell, users in Linux can belong to more than one group. In this
case you want to create a new group newgroup and add two users to that
group.Do this in the docker file as USER 0

RUN groupadd newgroup
## Now add the two users (these users need to exist)
RUN usermod -a -G newgroup jovyan
RUN usermod -a -G newgroup spark
## set permission on the directory
RUN chgrp -R newgroup /path/to/the/directory
RUN chmod -R 770 /path/to/the/directory

Check this thread as well

https://superuser.com/questions/280994/give-write-permissions-to-multiple-users-on-a-folder-in-ubuntu

HTH



   view my Linkedin profile
<https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>



*Disclaimer:* Use it at your own risk. Any and all responsibility for any
loss, damage or destruction of data or any other property which may arise
from relying on this email's technical content is explicitly disclaimed.
The author will in no case be liable for any monetary damages arising from
such loss, damage or destruction.




On Tue, 31 Aug 2021 at 20:50, Holden Karau <ho...@pigscanfly.ca> wrote:

> You can change the UID of one of them to match, or you could add them both
> to a group and set permissions to 770.
>
> On Tue, Aug 31, 2021 at 12:18 PM Bjørn Jørgensen <bj...@gmail.com>
> wrote:
>
>> Hi and thanks for all the good help.
>>
>> I will build jupyter on top of spark to be able to run jupyter in local
>> mode with the new koalas library. The new koalas library can be imported as
>> "from pyspark import pandas as ps".
>>
>> Then you can run spark on K8S the same way that you use pandas in a
>> notebook.
>>
>> The easiest way to get a PV in K8S is with NFS. And with NFS you will
>> find your files outside K8S without having to copy files out of a K8S PVC.
>>
>> With this setup I can use pandas code in a notebook with the power from a
>> K8S cluster, as a normal notebook with pandas code.
>> I hope that this project will be a easy way to convert from pandas to
>> spark on K8S.
>>
>>
>> I did some testing to day with file permission. Like  RUN mkdir -p
>> /home/files and RUN chmod g+w /home/files
>> But
>>
>> 185@myapp-38a8887b9cedae97-exec-1:~/work-dir$ id
>> uid=185(185) gid=0(root) groups=0(root)
>>
>>
>> jovyan@my-pyspark-notebook-f6d497958-t9rpk:~$ id
>> uid=1000(jovyan) gid=100(users) groups=100(users)
>>
>> so it did't work.
>>
>> What will be the best way to make jovyan and 185 write to the same
>> folder?
>> On 2021/08/30 23:00:40, Mich Talebzadeh <mi...@gmail.com>
>> wrote:
>> > To be specific uid=185 (spark user, AKA anonymous) and root are in the
>> same
>> > group in the docker image itself
>> >
>> >
>> > id
>> >
>> > uid=185(185) gid=0(root) groups=0(root)
>> >
>> >
>> > So in the docker image conf file, you can create your permanent
>> directory
>> > as root off /home say
>> >
>> > do it as root (USER 0)
>> >
>> >
>> > RUN mkdir -p /home/<MY-DIR>
>> >
>> > RUN chmod g+w /home/<MY-DIR>  ## give write permission to spark
>> >
>> >
>> > ARG spark_uid=185
>> > ..................
>> >
>> > # Specify the User that the actual main process will run as
>> >
>> > USER ${spark_uid}
>> >
>> >
>> >    view my Linkedin profile
>> > <https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>
>> >
>> >
>> >
>> > *Disclaimer:* Use it at your own risk. Any and all responsibility for
>> any
>> > loss, damage or destruction of data or any other property which may
>> arise
>> > from relying on this email's technical content is explicitly disclaimed.
>> > The author will in no case be liable for any monetary damages arising
>> from
>> > such loss, damage or destruction.
>> >
>> >
>> >
>> >
>> > On Mon, 30 Aug 2021 at 22:26, Mich Talebzadeh <
>> mich.talebzadeh@gmail.com>
>> > wrote:
>> >
>> > > Forgot to mention that Spark uses that work directory to unzip the
>> zipped
>> > > files or gunzip archive files
>> > >
>> > > For example
>> > >
>> > > pyFiles
>>  gs://axial-glow-224522-spark-on-k8s/codes/DSBQ.zip
>> > >
>> > >
>> > > Spark will use that $SPARK_HOME/work-dir to unzip DSBQ.zip which is
>> the
>> > > application package here
>> > >
>> > >
>> > > The alternative is to hack the docker file to create a directory for
>> > > yourself
>> > >
>> > >
>> > > RUN mkdir -p /home/conf
>> > >
>> > > RUN chmod g+w /home/conf
>> > >
>> > >
>> > > HTH
>> > >
>> > >
>> > > *Disclaimer:* Use it at your own risk. Any and all responsibility for
>> any
>> > > loss, damage or destruction of data or any other property which may
>> arise
>> > > from relying on this email's technical content is explicitly
>> disclaimed.
>> > > The author will in no case be liable for any monetary damages arising
>> from
>> > > such loss, damage or destruction.
>> > >
>> > >
>> > >
>> > >
>> > >
>> > >
>> > > On Mon, 30 Aug 2021 at 22:13, Mich Talebzadeh <
>> mich.talebzadeh@gmail.com>
>> > > wrote:
>> > >
>> > >> I am not familiar with  jupyterlab  so cannot comment on that.
>> > >>
>> > >> However, once your parquet file is written to the work-dir, how are
>> you
>> > >> going to utilise it?
>> > >>
>> > >> HTH
>> > >>
>> > >>
>> > >>
>> > >>
>> > >>    view my Linkedin profile
>> > >> <https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>
>> > >>
>> > >>
>> > >>
>> > >> *Disclaimer:* Use it at your own risk. Any and all responsibility for
>> > >> any loss, damage or destruction of data or any other property which
>> may
>> > >> arise from relying on this email's technical content is explicitly
>> > >> disclaimed. The author will in no case be liable for any monetary
>> damages
>> > >> arising from such loss, damage or destruction.
>> > >>
>> > >>
>> > >>
>> > >>
>> > >> On Mon, 30 Aug 2021 at 22:05, Bjørn Jørgensen <
>> bjornjorgensen@gmail.com>
>> > >> wrote:
>> > >>
>> > >>> ok, so when I use spark on k8s I can only save files to s3 buckets
>> or to
>> > >>> a database?
>> > >>>
>> > >>> Note my setup, its spark with jupyterlab on top on k8s.
>> > >>>
>> > >>> What are those for if I cant write files from spark in k8s to disk?
>> > >>>
>> > >>>
>> "spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
>> > >>> "False"
>> > >>>
>> "spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
>> > >>> "False"
>> > >>>
>> > >>> On 2021/08/30 20:50:22, Mich Talebzadeh <mi...@gmail.com>
>> > >>> wrote:
>> > >>> > Hi,
>> > >>> >
>> > >>> > You are trying to write to work-dir inside the docker and create
>> > >>> > sub-directories:
>> > >>> >
>> > >>> > The error you are getting is this
>> > >>> >
>> > >>> > Mkdirs failed to create
>> > >>> >
>> > >>>
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304682784428756208427_0026_m_000000_9563
>> > >>> > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >>> >
>> > >>> > That directory /work-dir is not recognised as a valid directory
>> > >>> > for storage. It is not in HDFS or HCFS format
>> > >>> >
>> > >>> >
>> > >>> > From Spark you can write to a bucket outside as a permanent
>> storage.
>> > >>> >
>> > >>> > HTH
>> > >>> >
>> > >>> >
>> > >>> >    view my Linkedin profile
>> > >>> > <https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>
>> > >>> >
>> > >>> >
>> > >>> >
>> > >>> > *Disclaimer:* Use it at your own risk. Any and all responsibility
>> for
>> > >>> any
>> > >>> > loss, damage or destruction of data or any other property which
>> may
>> > >>> arise
>> > >>> > from relying on this email's technical content is explicitly
>> > >>> disclaimed.
>> > >>> > The author will in no case be liable for any monetary damages
>> arising
>> > >>> from
>> > >>> > such loss, damage or destruction.
>> > >>> >
>> > >>> >
>> > >>> >
>> > >>> >
>> > >>> > On Mon, 30 Aug 2021 at 14:11, Bjørn Jørgensen <
>> > >>> bjornjorgensen@gmail.com>
>> > >>> > wrote:
>> > >>> >
>> > >>> > > Hi, I have built and running spark on k8s. A link to my repo
>> > >>> > > https://github.com/bjornjorgensen/jlpyk8s
>> > >>> > >
>> > >>> > > Everything seems to be running fine, but I can’t save to PVC.
>> > >>> > > If I convert the dataframe to pandas, then I can save it.
>> > >>> > >
>> > >>> > >
>> > >>> > >
>> > >>> > > from pyspark.sql import SparkSession
>> > >>> > > spark = SparkSession.builder \
>> > >>> > >     .master("k8s://
>> https://kubernetes.default.svc.cluster.local:443")
>> > >>> \
>> > >>> > >     .config("spark.kubernetes.container.image",
>> > >>> > > "bjornjorgensen/spark-py:v3.2-290821") \
>> > >>> > >     .config("spark.kubernetes.authenticate.caCertFile",
>> > >>> "/var/run/secrets/
>> > >>> > > kubernetes.io/serviceaccount/ca.crt") \
>> > >>> > >     .config("spark.kubernetes.authenticate.oauthTokenFile",
>> > >>> > > "/var/run/secrets/kubernetes.io/serviceaccount/token") \
>> > >>> > >
>> > >>>  .config("spark.kubernetes.authenticate.driver.serviceAccountName",
>> > >>> > > "my-pyspark-notebook") \
>> > >>> > >     .config("spark.executor.instances", "10") \
>> > >>> > >     .config("spark.driver.host",
>> > >>> > > "my-pyspark-notebook-spark-driver.default.svc.cluster.local") \
>> > >>> > >     .config("spark.driver.port", "29413") \
>> > >>> > >
>> > >>> > >
>> > >>>
>> .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.options.claimName",
>> > >>> > > "nfs100") \
>> > >>> > >
>> > >>> > >
>> > >>>
>> .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.path",
>> > >>> > > "/opt/spark/work-dir") \
>> > >>> > >
>> > >>> > >
>> > >>>
>> .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.options.claimName",
>> > >>> > > "nfs100") \
>> > >>> > >
>> > >>> > >
>> > >>>
>> .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.path",
>> > >>> > > "/opt/spark/work-dir") \
>> > >>> > >
>> > >>> > >
>> > >>>
>> .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
>> > >>> > > "False") \
>> > >>> > >
>> > >>> > >
>> > >>>
>> .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
>> > >>> > > "False") \
>> > >>> > >     .appName("myApp") \
>> > >>> > >     .config("spark.sql.repl.eagerEval.enabled", "True") \
>> > >>> > >     .config("spark.driver.memory", "4g") \
>> > >>> > >     .config("spark.executor.memory", "4g") \
>> > >>> > >     .getOrCreate()
>> > >>> > > sc = spark.sparkContext
>> > >>> > >
>> > >>> > > pdf.to_parquet("/opt/spark/work-dir/falk/test/F01test.parquet")
>> > >>> > >
>> > >>> > >
>> > >>> > > 21/08/30 12:20:34 WARN WindowExec: No Partition Defined for
>> Window
>> > >>> > > operation! Moving all data to a single partition, this can cause
>> > >>> serious
>> > >>> > > performance degradation.
>> > >>> > > 21/08/30 12:20:34 WARN WindowExec: No Partition Defined for
>> Window
>> > >>> > > operation! Moving all data to a single partition, this can cause
>> > >>> serious
>> > >>> > > performance degradation.
>> > >>> > > 21/08/30 12:20:37 WARN WindowExec: No Partition Defined for
>> Window
>> > >>> > > operation! Moving all data to a single partition, this can cause
>> > >>> serious
>> > >>> > > performance degradation.
>> > >>> > > 21/08/30 12:20:39 WARN TaskSetManager: Lost task 0.0 in stage
>> 25.0
>> > >>> (TID
>> > >>> > > 9497) (10.42.0.16 executor 3): java.io.IOException: Mkdirs
>> failed to
>> > >>> create
>> > >>> > >
>> > >>>
>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220375889526593865835092_0025_m_000000_9497
>> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >>> > >         at
>> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >>> > >         at
>> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >>> > >         at
>> > >>> > >
>> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> > >>> Source)
>> > >>> > >         at
>> > >>> > >
>> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> > >>> Source)
>> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >>> > >
>> > >>> > > 21/08/30 12:20:40 WARN TaskSetManager: Lost task 0.1 in stage
>> 25.0
>> > >>> (TID
>> > >>> > > 9498) (10.42.32.11 executor 2): java.io.IOException: Mkdirs
>> failed to
>> > >>> > > create
>> > >>> > >
>> > >>>
>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220371965695886629589207_0025_m_000000_9498
>> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >>> > >         at
>> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >>> > >         at
>> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >>> > >         at
>> > >>> > >
>> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> > >>> Source)
>> > >>> > >         at
>> > >>> > >
>> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> > >>> Source)
>> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >>> > >
>> > >>> > > 21/08/30 12:20:42 WARN TaskSetManager: Lost task 0.2 in stage
>> 25.0
>> > >>> (TID
>> > >>> > > 9499) (10.42.240.4 executor 4): java.io.IOException: Mkdirs
>> failed to
>> > >>> > > create
>> > >>> > >
>> > >>>
>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220378533320694235394580_0025_m_000000_9499
>> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >>> > >         at
>> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >>> > >         at
>> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >>> > >         at
>> > >>> > >
>> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> > >>> Source)
>> > >>> > >         at
>> > >>> > >
>> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> > >>> Source)
>> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >>> > >
>> > >>> > > 21/08/30 12:20:43 WARN TaskSetManager: Lost task 0.3 in stage
>> 25.0
>> > >>> (TID
>> > >>> > > 9500) (10.42.32.15 executor 10): java.io.IOException: Mkdirs
>> failed
>> > >>> to
>> > >>> > > create
>> > >>> > >
>> > >>>
>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
>> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >>> > >         at
>> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >>> > >         at
>> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >>> > >         at
>> > >>> > >
>> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> > >>> Source)
>> > >>> > >         at
>> > >>> > >
>> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> > >>> Source)
>> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >>> > >
>> > >>> > > 21/08/30 12:20:43 ERROR TaskSetManager: Task 0 in stage 25.0
>> failed 4
>> > >>> > > times; aborting job
>> > >>> > > 21/08/30 12:20:43 ERROR FileFormatWriter: Aborting job
>> > >>> > > d98cdc60-bb44-4189-b483-8449fc793658.
>> > >>> > > org.apache.spark.SparkException: Job aborted due to stage
>> failure:
>> > >>> Task 0
>> > >>> > > in stage 25.0 failed 4 times, most recent failure: Lost task
>> 0.3 in
>> > >>> stage
>> > >>> > > 25.0 (TID 9500) (10.42.32.15 executor 10): java.io.IOException:
>> > >>> Mkdirs
>> > >>> > > failed to create
>> > >>> > >
>> > >>>
>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
>> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >>> > >         at
>> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >>> > >         at
>> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >>> > >         at
>> > >>> > >
>> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> > >>> Source)
>> > >>> > >         at
>> > >>> > >
>> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> > >>> Source)
>> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >>> > >
>> > >>> > > Driver stacktrace:
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
>> > >>> > >         at
>> > >>> > >
>> scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
>> > >>> > >         at scala.Option.foreach(Option.scala:407)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
>> > >>> > >         at
>> > >>> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
>> > >>> > >         at
>> > >>> org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
>> > >>> > >         at
>> > >>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
>> > >>> > >
>> > >>>
>> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
>> > >>> > > Method)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>> > >>> > >         at
>> java.base/java.lang.reflect.Method.invoke(Method.java:566)
>> > >>> > >         at
>> > >>> py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
>> > >>> > >         at
>> > >>> > >
>> py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
>> > >>> > >         at py4j.Gateway.invoke(Gateway.java:282)
>> > >>> > >         at
>> > >>> > >
>> py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
>> > >>> > >         at
>> py4j.commands.CallCommand.execute(CallCommand.java:79)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
>> > >>> > >         at
>> > >>> py4j.ClientServerConnection.run(ClientServerConnection.java:106)
>> > >>> > >         at java.base/java.lang.Thread.run(Thread.java:829)
>> > >>> > > Caused by: java.io.IOException: Mkdirs failed to create
>> > >>> > >
>> > >>>
>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
>> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >>> > >         at
>> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >>> > >         at
>> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >>> > >         at
>> > >>> > >
>> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> > >>> Source)
>> > >>> > >         at
>> > >>> > >
>> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> > >>> Source)
>> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >>> > >
>> > >>>
>> ---------------------------------------------------------------------------
>> > >>> > > Py4JJavaError                             Traceback (most recent
>> > >>> call last)
>> > >>> > > /tmp/ipykernel_80/163396320.py in <module>
>> > >>> > > ----> 1
>> > >>> pdf.to_parquet("/opt/spark/work-dir/falk/test/F01test.parquet")
>> > >>> > >
>> > >>> > > /opt/spark/python/pyspark/pandas/frame.py in to_parquet(self,
>> path,
>> > >>> mode,
>> > >>> > > partition_cols, compression, index_col, **options)
>> > >>> > >    4721         if compression is not None:
>> > >>> > >    4722             builder.option("compression", compression)
>> > >>> > > -> 4723
>> > >>>  builder.options(**options).format("parquet").save(path)
>> > >>> > >    4724
>> > >>> > >    4725     def to_orc(
>> > >>> > >
>> > >>> > > /opt/spark/python/pyspark/sql/readwriter.py in save(self, path,
>> > >>> format,
>> > >>> > > mode, partitionBy, **options)
>> > >>> > >     738             self._jwrite.save()
>> > >>> > >     739         else:
>> > >>> > > --> 740             self._jwrite.save(path)
>> > >>> > >     741
>> > >>> > >     742     @since(1.4)
>> > >>> > >
>> > >>> > > /opt/conda/lib/python3.9/site-packages/py4j/java_gateway.py in
>> > >>> > > __call__(self, *args)
>> > >>> > >    1307
>> > >>> > >    1308         answer =
>> self.gateway_client.send_command(command)
>> > >>> > > -> 1309         return_value = get_return_value(
>> > >>> > >    1310             answer, self.gateway_client, self.target_id,
>> > >>> self.name
>> > >>> > > )
>> > >>> > >    1311
>> > >>> > >
>> > >>> > > /opt/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
>> > >>> > >     109     def deco(*a, **kw):
>> > >>> > >     110         try:
>> > >>> > > --> 111             return f(*a, **kw)
>> > >>> > >     112         except py4j.protocol.Py4JJavaError as e:
>> > >>> > >     113             converted =
>> convert_exception(e.java_exception)
>> > >>> > >
>> > >>> > > /opt/conda/lib/python3.9/site-packages/py4j/protocol.py in
>> > >>> > > get_return_value(answer, gateway_client, target_id, name)
>> > >>> > >     324             value = OUTPUT_CONVERTER[type](answer[2:],
>> > >>> > > gateway_client)
>> > >>> > >     325             if answer[1] == REFERENCE_TYPE:
>> > >>> > > --> 326                 raise Py4JJavaError(
>> > >>> > >     327                     "An error occurred while calling
>> > >>> {0}{1}{2}.\n".
>> > >>> > >     328                     format(target_id, ".", name), value)
>> > >>> > >
>> > >>> > > Py4JJavaError: An error occurred while calling o4804.save.
>> > >>> > > : org.apache.spark.SparkException: Job aborted.
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.errors.QueryExecutionErrors$.jobAbortedError(QueryExecutionErrors.scala:496)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:251)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
>> > >>> > >         at
>> > >>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
>> > >>> > >
>> > >>>
>> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
>> > >>> > > Method)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>> > >>> > >         at
>> java.base/java.lang.reflect.Method.invoke(Method.java:566)
>> > >>> > >         at
>> > >>> py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
>> > >>> > >         at
>> > >>> > >
>> py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
>> > >>> > >         at py4j.Gateway.invoke(Gateway.java:282)
>> > >>> > >         at
>> > >>> > >
>> py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
>> > >>> > >         at
>> py4j.commands.CallCommand.execute(CallCommand.java:79)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
>> > >>> > >         at
>> > >>> py4j.ClientServerConnection.run(ClientServerConnection.java:106)
>> > >>> > >         at java.base/java.lang.Thread.run(Thread.java:829)
>> > >>> > > Caused by: org.apache.spark.SparkException: Job aborted due to
>> stage
>> > >>> > > failure: Task 0 in stage 25.0 failed 4 times, most recent
>> failure:
>> > >>> Lost
>> > >>> > > task 0.3 in stage 25.0 (TID 9500) (10.42.32.15 executor 10):
>> > >>> > > java.io.IOException: Mkdirs failed to create
>> > >>> > >
>> > >>>
>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
>> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >>> > >         at
>> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >>> > >         at
>> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >>> > >         at
>> > >>> > >
>> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> > >>> Source)
>> > >>> > >         at
>> > >>> > >
>> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> > >>> Source)
>> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >>> > >
>> > >>> > > Driver stacktrace:
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
>> > >>> > >         at
>> > >>> > >
>> scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
>> > >>> > >         at scala.Option.foreach(Option.scala:407)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
>> > >>> > >         at
>> > >>> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
>> > >>> > >         at
>> > >>> org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
>> > >>> > >         ... 41 more
>> > >>> > > Caused by: java.io.IOException: Mkdirs failed to create
>> > >>> > >
>> > >>>
>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
>> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >>> > >         at
>> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >>> > >         at
>> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >>> > >         at
>> > >>> > >
>> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> > >>> Source)
>> > >>> > >         at
>> > >>> > >
>> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> > >>> Source)
>> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >>> > >
>> > >>> > >
>> > >>> > >
>> > >>> > >
>> > >>> > > df.write.parquet("/opt/spark/work-dir/falk/F01test_df.parquet",
>> > >>> > > mode="overwrite")
>> > >>> > >
>> > >>> > >
>> > >>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 2.0 in stage
>> 26.0
>> > >>> (TID
>> > >>> > > 9543) (10.42.240.3 executor 1): java.io.IOException: Mkdirs
>> failed to
>> > >>> > > create
>> > >>> > >
>> > >>>
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_20210829190630570334759957727637_0026_m_000002_9543
>> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >>> > >         at
>> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >>> > >         at
>> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >>> > >         at
>> > >>> > >
>> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> > >>> Source)
>> > >>> > >         at
>> > >>> > >
>> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> > >>> Source)
>> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >>> > >
>> > >>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 1.0 in stage
>> 26.0
>> > >>> (TID
>> > >>> > > 9542) (10.42.32.11 executor 2): java.io.IOException: Mkdirs
>> failed to
>> > >>> > > create
>> > >>> > >
>> > >>>
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906306992160257769852924_0026_m_000001_9542
>> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >>> > >         at
>> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >>> > >         at
>> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >>> > >         at
>> > >>> > >
>> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> > >>> Source)
>> > >>> > >         at
>> > >>> > >
>> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> > >>> Source)
>> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >>> > >
>> > >>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 4.0 in stage
>> 26.0
>> > >>> (TID
>> > >>> > > 9545) (10.42.0.12 executor 3): java.io.IOException: Mkdirs
>> failed to
>> > >>> create
>> > >>> > >
>> > >>>
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906305635902832664702349_0026_m_000004_9545
>> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >>> > >         at
>> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >>> > >         at
>> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >>> > >         at
>> > >>> > >
>> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> > >>> Source)
>> > >>> > >         at
>> > >>> > >
>> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> > >>> Source)
>> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >>> > >
>> > >>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 10.0 in stage
>> 26.0
>> > >>> (TID
>> > >>> > > 9551) (10.42.240.3 executor 1): java.io.IOException: Mkdirs
>> failed to
>> > >>> > > create
>> > >>> > >
>> > >>>
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303695223706240035696_0026_m_000010_9551
>> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >>> > >         at
>> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >>> > >         at
>> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >>> > >         at
>> > >>> > >
>> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >>> > >         at
>> > >>> > >
>> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> > >>> Source)
>> > >>> > >         at
>> > >>> > >
>> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> > >>> Source)
>> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >>> > >
>> > >>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 2.1 in stage
>> 26.0
>> > >>> (TID
>> > >>> > > 9552) (10.42.32.11 executor 2): java.io.IOException: Mkdirs
>> failed to
>> > >>> > > create
>> > >>> > >
>> > >>>
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303153023682655991980_0026_m_000002_9552
>> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >>> > >         at
>> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >>> > >         at
>> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >>> > >         at
>> > >>> > >
>> > >>>
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >
>
> --
> Twitter: https://twitter.com/holdenkarau
> Books (Learning Spark, High Performance Spark, etc.):
> https://amzn.to/2MaRAG9  <https://amzn.to/2MaRAG9>
> YouTube Live Streams: https://www.youtube.com/user/holdenkarau
>

Re: Can’t write to PVC in K8S

Posted by Holden Karau <ho...@pigscanfly.ca>.
You can change the UID of one of them to match, or you could add them both
to a group and set permissions to 770.

On Tue, Aug 31, 2021 at 12:18 PM Bjørn Jørgensen <bj...@gmail.com>
wrote:

> Hi and thanks for all the good help.
>
> I will build jupyter on top of spark to be able to run jupyter in local
> mode with the new koalas library. The new koalas library can be imported as
> "from pyspark import pandas as ps".
>
> Then you can run spark on K8S the same way that you use pandas in a
> notebook.
>
> The easiest way to get a PV in K8S is with NFS. And with NFS you will find
> your files outside K8S without having to copy files out of a K8S PVC.
>
> With this setup I can use pandas code in a notebook with the power from a
> K8S cluster, as a normal notebook with pandas code.
> I hope that this project will be a easy way to convert from pandas to
> spark on K8S.
>
>
> I did some testing to day with file permission. Like  RUN mkdir -p
> /home/files and RUN chmod g+w /home/files
> But
>
> 185@myapp-38a8887b9cedae97-exec-1:~/work-dir$ id
> uid=185(185) gid=0(root) groups=0(root)
>
>
> jovyan@my-pyspark-notebook-f6d497958-t9rpk:~$ id
> uid=1000(jovyan) gid=100(users) groups=100(users)
>
> so it did't work.
>
> What will be the best way to make jovyan and 185 write to the same folder?
> On 2021/08/30 23:00:40, Mich Talebzadeh <mi...@gmail.com>
> wrote:
> > To be specific uid=185 (spark user, AKA anonymous) and root are in the
> same
> > group in the docker image itself
> >
> >
> > id
> >
> > uid=185(185) gid=0(root) groups=0(root)
> >
> >
> > So in the docker image conf file, you can create your permanent directory
> > as root off /home say
> >
> > do it as root (USER 0)
> >
> >
> > RUN mkdir -p /home/<MY-DIR>
> >
> > RUN chmod g+w /home/<MY-DIR>  ## give write permission to spark
> >
> >
> > ARG spark_uid=185
> > ..................
> >
> > # Specify the User that the actual main process will run as
> >
> > USER ${spark_uid}
> >
> >
> >    view my Linkedin profile
> > <https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>
> >
> >
> >
> > *Disclaimer:* Use it at your own risk. Any and all responsibility for any
> > loss, damage or destruction of data or any other property which may arise
> > from relying on this email's technical content is explicitly disclaimed.
> > The author will in no case be liable for any monetary damages arising
> from
> > such loss, damage or destruction.
> >
> >
> >
> >
> > On Mon, 30 Aug 2021 at 22:26, Mich Talebzadeh <mich.talebzadeh@gmail.com
> >
> > wrote:
> >
> > > Forgot to mention that Spark uses that work directory to unzip the
> zipped
> > > files or gunzip archive files
> > >
> > > For example
> > >
> > > pyFiles
>  gs://axial-glow-224522-spark-on-k8s/codes/DSBQ.zip
> > >
> > >
> > > Spark will use that $SPARK_HOME/work-dir to unzip DSBQ.zip which is the
> > > application package here
> > >
> > >
> > > The alternative is to hack the docker file to create a directory for
> > > yourself
> > >
> > >
> > > RUN mkdir -p /home/conf
> > >
> > > RUN chmod g+w /home/conf
> > >
> > >
> > > HTH
> > >
> > >
> > > *Disclaimer:* Use it at your own risk. Any and all responsibility for
> any
> > > loss, damage or destruction of data or any other property which may
> arise
> > > from relying on this email's technical content is explicitly
> disclaimed.
> > > The author will in no case be liable for any monetary damages arising
> from
> > > such loss, damage or destruction.
> > >
> > >
> > >
> > >
> > >
> > >
> > > On Mon, 30 Aug 2021 at 22:13, Mich Talebzadeh <
> mich.talebzadeh@gmail.com>
> > > wrote:
> > >
> > >> I am not familiar with  jupyterlab  so cannot comment on that.
> > >>
> > >> However, once your parquet file is written to the work-dir, how are
> you
> > >> going to utilise it?
> > >>
> > >> HTH
> > >>
> > >>
> > >>
> > >>
> > >>    view my Linkedin profile
> > >> <https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>
> > >>
> > >>
> > >>
> > >> *Disclaimer:* Use it at your own risk. Any and all responsibility for
> > >> any loss, damage or destruction of data or any other property which
> may
> > >> arise from relying on this email's technical content is explicitly
> > >> disclaimed. The author will in no case be liable for any monetary
> damages
> > >> arising from such loss, damage or destruction.
> > >>
> > >>
> > >>
> > >>
> > >> On Mon, 30 Aug 2021 at 22:05, Bjørn Jørgensen <
> bjornjorgensen@gmail.com>
> > >> wrote:
> > >>
> > >>> ok, so when I use spark on k8s I can only save files to s3 buckets
> or to
> > >>> a database?
> > >>>
> > >>> Note my setup, its spark with jupyterlab on top on k8s.
> > >>>
> > >>> What are those for if I cant write files from spark in k8s to disk?
> > >>>
> > >>>
> "spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
> > >>> "False"
> > >>>
> "spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
> > >>> "False"
> > >>>
> > >>> On 2021/08/30 20:50:22, Mich Talebzadeh <mi...@gmail.com>
> > >>> wrote:
> > >>> > Hi,
> > >>> >
> > >>> > You are trying to write to work-dir inside the docker and create
> > >>> > sub-directories:
> > >>> >
> > >>> > The error you are getting is this
> > >>> >
> > >>> > Mkdirs failed to create
> > >>> >
> > >>>
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304682784428756208427_0026_m_000000_9563
> > >>> > (exists=false, cwd=file:/opt/spark/work-dir)
> > >>> >
> > >>> > That directory /work-dir is not recognised as a valid directory
> > >>> > for storage. It is not in HDFS or HCFS format
> > >>> >
> > >>> >
> > >>> > From Spark you can write to a bucket outside as a permanent
> storage.
> > >>> >
> > >>> > HTH
> > >>> >
> > >>> >
> > >>> >    view my Linkedin profile
> > >>> > <https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>
> > >>> >
> > >>> >
> > >>> >
> > >>> > *Disclaimer:* Use it at your own risk. Any and all responsibility
> for
> > >>> any
> > >>> > loss, damage or destruction of data or any other property which may
> > >>> arise
> > >>> > from relying on this email's technical content is explicitly
> > >>> disclaimed.
> > >>> > The author will in no case be liable for any monetary damages
> arising
> > >>> from
> > >>> > such loss, damage or destruction.
> > >>> >
> > >>> >
> > >>> >
> > >>> >
> > >>> > On Mon, 30 Aug 2021 at 14:11, Bjørn Jørgensen <
> > >>> bjornjorgensen@gmail.com>
> > >>> > wrote:
> > >>> >
> > >>> > > Hi, I have built and running spark on k8s. A link to my repo
> > >>> > > https://github.com/bjornjorgensen/jlpyk8s
> > >>> > >
> > >>> > > Everything seems to be running fine, but I can’t save to PVC.
> > >>> > > If I convert the dataframe to pandas, then I can save it.
> > >>> > >
> > >>> > >
> > >>> > >
> > >>> > > from pyspark.sql import SparkSession
> > >>> > > spark = SparkSession.builder \
> > >>> > >     .master("k8s://
> https://kubernetes.default.svc.cluster.local:443")
> > >>> \
> > >>> > >     .config("spark.kubernetes.container.image",
> > >>> > > "bjornjorgensen/spark-py:v3.2-290821") \
> > >>> > >     .config("spark.kubernetes.authenticate.caCertFile",
> > >>> "/var/run/secrets/
> > >>> > > kubernetes.io/serviceaccount/ca.crt") \
> > >>> > >     .config("spark.kubernetes.authenticate.oauthTokenFile",
> > >>> > > "/var/run/secrets/kubernetes.io/serviceaccount/token") \
> > >>> > >
> > >>>  .config("spark.kubernetes.authenticate.driver.serviceAccountName",
> > >>> > > "my-pyspark-notebook") \
> > >>> > >     .config("spark.executor.instances", "10") \
> > >>> > >     .config("spark.driver.host",
> > >>> > > "my-pyspark-notebook-spark-driver.default.svc.cluster.local") \
> > >>> > >     .config("spark.driver.port", "29413") \
> > >>> > >
> > >>> > >
> > >>>
> .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.options.claimName",
> > >>> > > "nfs100") \
> > >>> > >
> > >>> > >
> > >>>
> .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.path",
> > >>> > > "/opt/spark/work-dir") \
> > >>> > >
> > >>> > >
> > >>>
> .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.options.claimName",
> > >>> > > "nfs100") \
> > >>> > >
> > >>> > >
> > >>>
> .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.path",
> > >>> > > "/opt/spark/work-dir") \
> > >>> > >
> > >>> > >
> > >>>
> .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
> > >>> > > "False") \
> > >>> > >
> > >>> > >
> > >>>
> .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
> > >>> > > "False") \
> > >>> > >     .appName("myApp") \
> > >>> > >     .config("spark.sql.repl.eagerEval.enabled", "True") \
> > >>> > >     .config("spark.driver.memory", "4g") \
> > >>> > >     .config("spark.executor.memory", "4g") \
> > >>> > >     .getOrCreate()
> > >>> > > sc = spark.sparkContext
> > >>> > >
> > >>> > > pdf.to_parquet("/opt/spark/work-dir/falk/test/F01test.parquet")
> > >>> > >
> > >>> > >
> > >>> > > 21/08/30 12:20:34 WARN WindowExec: No Partition Defined for
> Window
> > >>> > > operation! Moving all data to a single partition, this can cause
> > >>> serious
> > >>> > > performance degradation.
> > >>> > > 21/08/30 12:20:34 WARN WindowExec: No Partition Defined for
> Window
> > >>> > > operation! Moving all data to a single partition, this can cause
> > >>> serious
> > >>> > > performance degradation.
> > >>> > > 21/08/30 12:20:37 WARN WindowExec: No Partition Defined for
> Window
> > >>> > > operation! Moving all data to a single partition, this can cause
> > >>> serious
> > >>> > > performance degradation.
> > >>> > > 21/08/30 12:20:39 WARN TaskSetManager: Lost task 0.0 in stage
> 25.0
> > >>> (TID
> > >>> > > 9497) (10.42.0.16 executor 3): java.io.IOException: Mkdirs
> failed to
> > >>> create
> > >>> > >
> > >>>
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220375889526593865835092_0025_m_000000_9497
> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >>> > >         at
> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >>> > >         at
> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >>> > >         at
> > >>> > >
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >>> > >         at
> > >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >>> > >         at
> > >>> > >
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >>> > >         at
> > >>> > >
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> > >>> Source)
> > >>> > >         at
> > >>> > >
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> > >>> Source)
> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >>> > >
> > >>> > > 21/08/30 12:20:40 WARN TaskSetManager: Lost task 0.1 in stage
> 25.0
> > >>> (TID
> > >>> > > 9498) (10.42.32.11 executor 2): java.io.IOException: Mkdirs
> failed to
> > >>> > > create
> > >>> > >
> > >>>
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220371965695886629589207_0025_m_000000_9498
> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >>> > >         at
> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >>> > >         at
> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >>> > >         at
> > >>> > >
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >>> > >         at
> > >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >>> > >         at
> > >>> > >
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >>> > >         at
> > >>> > >
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> > >>> Source)
> > >>> > >         at
> > >>> > >
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> > >>> Source)
> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >>> > >
> > >>> > > 21/08/30 12:20:42 WARN TaskSetManager: Lost task 0.2 in stage
> 25.0
> > >>> (TID
> > >>> > > 9499) (10.42.240.4 executor 4): java.io.IOException: Mkdirs
> failed to
> > >>> > > create
> > >>> > >
> > >>>
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220378533320694235394580_0025_m_000000_9499
> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >>> > >         at
> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >>> > >         at
> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >>> > >         at
> > >>> > >
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >>> > >         at
> > >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >>> > >         at
> > >>> > >
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >>> > >         at
> > >>> > >
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> > >>> Source)
> > >>> > >         at
> > >>> > >
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> > >>> Source)
> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >>> > >
> > >>> > > 21/08/30 12:20:43 WARN TaskSetManager: Lost task 0.3 in stage
> 25.0
> > >>> (TID
> > >>> > > 9500) (10.42.32.15 executor 10): java.io.IOException: Mkdirs
> failed
> > >>> to
> > >>> > > create
> > >>> > >
> > >>>
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >>> > >         at
> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >>> > >         at
> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >>> > >         at
> > >>> > >
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >>> > >         at
> > >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >>> > >         at
> > >>> > >
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >>> > >         at
> > >>> > >
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> > >>> Source)
> > >>> > >         at
> > >>> > >
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> > >>> Source)
> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >>> > >
> > >>> > > 21/08/30 12:20:43 ERROR TaskSetManager: Task 0 in stage 25.0
> failed 4
> > >>> > > times; aborting job
> > >>> > > 21/08/30 12:20:43 ERROR FileFormatWriter: Aborting job
> > >>> > > d98cdc60-bb44-4189-b483-8449fc793658.
> > >>> > > org.apache.spark.SparkException: Job aborted due to stage
> failure:
> > >>> Task 0
> > >>> > > in stage 25.0 failed 4 times, most recent failure: Lost task 0.3
> in
> > >>> stage
> > >>> > > 25.0 (TID 9500) (10.42.32.15 executor 10): java.io.IOException:
> > >>> Mkdirs
> > >>> > > failed to create
> > >>> > >
> > >>>
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >>> > >         at
> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >>> > >         at
> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >>> > >         at
> > >>> > >
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >>> > >         at
> > >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >>> > >         at
> > >>> > >
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >>> > >         at
> > >>> > >
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> > >>> Source)
> > >>> > >         at
> > >>> > >
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> > >>> Source)
> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >>> > >
> > >>> > > Driver stacktrace:
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
> > >>> > >         at
> > >>> > >
> > >>>
> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
> > >>> > >         at
> > >>> > >
> > >>>
> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
> > >>> > >         at
> > >>> > >
> scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
> > >>> > >         at scala.Option.foreach(Option.scala:407)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
> > >>> > >         at
> > >>> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
> > >>> > >         at
> > >>> org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
> > >>> > >         at
> > >>> > >
> org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
> > >>> > >         at
> > >>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
> > >>> > >
> > >>>
> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
> > >>> > >         at
> > >>> > >
> org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
> > >>> > >         at
> > >>> > >
> > >>>
> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
> > >>> > > Method)
> > >>> > >         at
> > >>> > >
> > >>>
> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> > >>> > >         at
> > >>> > >
> > >>>
> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> > >>> > >         at
> java.base/java.lang.reflect.Method.invoke(Method.java:566)
> > >>> > >         at
> > >>> py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
> > >>> > >         at
> > >>> > >
> py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
> > >>> > >         at py4j.Gateway.invoke(Gateway.java:282)
> > >>> > >         at
> > >>> > >
> py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
> > >>> > >         at py4j.commands.CallCommand.execute(CallCommand.java:79)
> > >>> > >         at
> > >>> > >
> > >>>
> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
> > >>> > >         at
> > >>> py4j.ClientServerConnection.run(ClientServerConnection.java:106)
> > >>> > >         at java.base/java.lang.Thread.run(Thread.java:829)
> > >>> > > Caused by: java.io.IOException: Mkdirs failed to create
> > >>> > >
> > >>>
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >>> > >         at
> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >>> > >         at
> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >>> > >         at
> > >>> > >
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >>> > >         at
> > >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >>> > >         at
> > >>> > >
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >>> > >         at
> > >>> > >
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> > >>> Source)
> > >>> > >         at
> > >>> > >
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> > >>> Source)
> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >>> > >
> > >>>
> ---------------------------------------------------------------------------
> > >>> > > Py4JJavaError                             Traceback (most recent
> > >>> call last)
> > >>> > > /tmp/ipykernel_80/163396320.py in <module>
> > >>> > > ----> 1
> > >>> pdf.to_parquet("/opt/spark/work-dir/falk/test/F01test.parquet")
> > >>> > >
> > >>> > > /opt/spark/python/pyspark/pandas/frame.py in to_parquet(self,
> path,
> > >>> mode,
> > >>> > > partition_cols, compression, index_col, **options)
> > >>> > >    4721         if compression is not None:
> > >>> > >    4722             builder.option("compression", compression)
> > >>> > > -> 4723
> > >>>  builder.options(**options).format("parquet").save(path)
> > >>> > >    4724
> > >>> > >    4725     def to_orc(
> > >>> > >
> > >>> > > /opt/spark/python/pyspark/sql/readwriter.py in save(self, path,
> > >>> format,
> > >>> > > mode, partitionBy, **options)
> > >>> > >     738             self._jwrite.save()
> > >>> > >     739         else:
> > >>> > > --> 740             self._jwrite.save(path)
> > >>> > >     741
> > >>> > >     742     @since(1.4)
> > >>> > >
> > >>> > > /opt/conda/lib/python3.9/site-packages/py4j/java_gateway.py in
> > >>> > > __call__(self, *args)
> > >>> > >    1307
> > >>> > >    1308         answer =
> self.gateway_client.send_command(command)
> > >>> > > -> 1309         return_value = get_return_value(
> > >>> > >    1310             answer, self.gateway_client, self.target_id,
> > >>> self.name
> > >>> > > )
> > >>> > >    1311
> > >>> > >
> > >>> > > /opt/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
> > >>> > >     109     def deco(*a, **kw):
> > >>> > >     110         try:
> > >>> > > --> 111             return f(*a, **kw)
> > >>> > >     112         except py4j.protocol.Py4JJavaError as e:
> > >>> > >     113             converted =
> convert_exception(e.java_exception)
> > >>> > >
> > >>> > > /opt/conda/lib/python3.9/site-packages/py4j/protocol.py in
> > >>> > > get_return_value(answer, gateway_client, target_id, name)
> > >>> > >     324             value = OUTPUT_CONVERTER[type](answer[2:],
> > >>> > > gateway_client)
> > >>> > >     325             if answer[1] == REFERENCE_TYPE:
> > >>> > > --> 326                 raise Py4JJavaError(
> > >>> > >     327                     "An error occurred while calling
> > >>> {0}{1}{2}.\n".
> > >>> > >     328                     format(target_id, ".", name), value)
> > >>> > >
> > >>> > > Py4JJavaError: An error occurred while calling o4804.save.
> > >>> > > : org.apache.spark.SparkException: Job aborted.
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.errors.QueryExecutionErrors$.jobAbortedError(QueryExecutionErrors.scala:496)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:251)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
> > >>> > >         at
> > >>> > >
> org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
> > >>> > >         at
> > >>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
> > >>> > >
> > >>>
> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
> > >>> > >         at
> > >>> > >
> org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
> > >>> > >         at
> > >>> > >
> > >>>
> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
> > >>> > > Method)
> > >>> > >         at
> > >>> > >
> > >>>
> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> > >>> > >         at
> > >>> > >
> > >>>
> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> > >>> > >         at
> java.base/java.lang.reflect.Method.invoke(Method.java:566)
> > >>> > >         at
> > >>> py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
> > >>> > >         at
> > >>> > >
> py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
> > >>> > >         at py4j.Gateway.invoke(Gateway.java:282)
> > >>> > >         at
> > >>> > >
> py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
> > >>> > >         at py4j.commands.CallCommand.execute(CallCommand.java:79)
> > >>> > >         at
> > >>> > >
> > >>>
> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
> > >>> > >         at
> > >>> py4j.ClientServerConnection.run(ClientServerConnection.java:106)
> > >>> > >         at java.base/java.lang.Thread.run(Thread.java:829)
> > >>> > > Caused by: org.apache.spark.SparkException: Job aborted due to
> stage
> > >>> > > failure: Task 0 in stage 25.0 failed 4 times, most recent
> failure:
> > >>> Lost
> > >>> > > task 0.3 in stage 25.0 (TID 9500) (10.42.32.15 executor 10):
> > >>> > > java.io.IOException: Mkdirs failed to create
> > >>> > >
> > >>>
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >>> > >         at
> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >>> > >         at
> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >>> > >         at
> > >>> > >
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >>> > >         at
> > >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >>> > >         at
> > >>> > >
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >>> > >         at
> > >>> > >
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> > >>> Source)
> > >>> > >         at
> > >>> > >
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> > >>> Source)
> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >>> > >
> > >>> > > Driver stacktrace:
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
> > >>> > >         at
> > >>> > >
> > >>>
> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
> > >>> > >         at
> > >>> > >
> > >>>
> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
> > >>> > >         at
> > >>> > >
> scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
> > >>> > >         at scala.Option.foreach(Option.scala:407)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
> > >>> > >         at
> > >>> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
> > >>> > >         at
> > >>> org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
> > >>> > >         ... 41 more
> > >>> > > Caused by: java.io.IOException: Mkdirs failed to create
> > >>> > >
> > >>>
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >>> > >         at
> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >>> > >         at
> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >>> > >         at
> > >>> > >
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >>> > >         at
> > >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >>> > >         at
> > >>> > >
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >>> > >         at
> > >>> > >
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> > >>> Source)
> > >>> > >         at
> > >>> > >
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> > >>> Source)
> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >>> > >
> > >>> > >
> > >>> > >
> > >>> > >
> > >>> > > df.write.parquet("/opt/spark/work-dir/falk/F01test_df.parquet",
> > >>> > > mode="overwrite")
> > >>> > >
> > >>> > >
> > >>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 2.0 in stage
> 26.0
> > >>> (TID
> > >>> > > 9543) (10.42.240.3 executor 1): java.io.IOException: Mkdirs
> failed to
> > >>> > > create
> > >>> > >
> > >>>
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_20210829190630570334759957727637_0026_m_000002_9543
> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >>> > >         at
> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >>> > >         at
> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >>> > >         at
> > >>> > >
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >>> > >         at
> > >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >>> > >         at
> > >>> > >
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >>> > >         at
> > >>> > >
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> > >>> Source)
> > >>> > >         at
> > >>> > >
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> > >>> Source)
> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >>> > >
> > >>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 1.0 in stage
> 26.0
> > >>> (TID
> > >>> > > 9542) (10.42.32.11 executor 2): java.io.IOException: Mkdirs
> failed to
> > >>> > > create
> > >>> > >
> > >>>
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906306992160257769852924_0026_m_000001_9542
> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >>> > >         at
> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >>> > >         at
> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >>> > >         at
> > >>> > >
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >>> > >         at
> > >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >>> > >         at
> > >>> > >
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >>> > >         at
> > >>> > >
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> > >>> Source)
> > >>> > >         at
> > >>> > >
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> > >>> Source)
> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >>> > >
> > >>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 4.0 in stage
> 26.0
> > >>> (TID
> > >>> > > 9545) (10.42.0.12 executor 3): java.io.IOException: Mkdirs
> failed to
> > >>> create
> > >>> > >
> > >>>
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906305635902832664702349_0026_m_000004_9545
> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >>> > >         at
> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >>> > >         at
> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >>> > >         at
> > >>> > >
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >>> > >         at
> > >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >>> > >         at
> > >>> > >
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >>> > >         at
> > >>> > >
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> > >>> Source)
> > >>> > >         at
> > >>> > >
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> > >>> Source)
> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >>> > >
> > >>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 10.0 in stage
> 26.0
> > >>> (TID
> > >>> > > 9551) (10.42.240.3 executor 1): java.io.IOException: Mkdirs
> failed to
> > >>> > > create
> > >>> > >
> > >>>
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303695223706240035696_0026_m_000010_9551
> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >>> > >         at
> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >>> > >         at
> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >>> > >         at
> > >>> > >
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >>> > >         at
> > >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >>> > >         at
> > >>> > >
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >>> > >         at
> > >>> > >
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> > >>> Source)
> > >>> > >         at
> > >>> > >
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> > >>> Source)
> > >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >>> > >
> > >>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 2.1 in stage
> 26.0
> > >>> (TID
> > >>> > > 9552) (10.42.32.11 executor 2): java.io.IOException: Mkdirs
> failed to
> > >>> > > create
> > >>> > >
> > >>>
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303153023682655991980_0026_m_000002_9552
> > >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >>> > >         at
> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >>> > >         at
> > >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >>> > >         at
> > >>> > >
> > >>>
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >

-- 
Twitter: https://twitter.com/holdenkarau
Books (Learning Spark, High Performance Spark, etc.):
https://amzn.to/2MaRAG9  <https://amzn.to/2MaRAG9>
YouTube Live Streams: https://www.youtube.com/user/holdenkarau

Re: Can���t write to PVC in K8S

Posted by Bj��rn J��rgensen <bj...@gmail.com>.
Hi and thanks for all the good help. 

I will build jupyter on top of spark to be able to run jupyter in local mode with the new koalas library. The new koalas library can be imported as "from pyspark import pandas as ps".  

Then you can run spark on K8S the same way that you use pandas in a notebook. 

The easiest way to get a PV in K8S is with NFS. And with NFS you will find your files outside K8S without having to copy files out of a K8S PVC.

With this setup I can use pandas code in a notebook with the power from a K8S cluster, as a normal notebook with pandas code.
I hope that this project will be a easy way to convert from pandas to spark on K8S.


I did some testing to day with file permission. Like  RUN mkdir -p /home/files and RUN chmod g+w /home/files 
But 

185@myapp-38a8887b9cedae97-exec-1:~/work-dir$ id
uid=185(185) gid=0(root) groups=0(root)


jovyan@my-pyspark-notebook-f6d497958-t9rpk:~$ id
uid=1000(jovyan) gid=100(users) groups=100(users)

so it did't work.

What will be the best way to make jovyan and 185 write to the same folder? 
On 2021/08/30 23:00:40, Mich Talebzadeh <mi...@gmail.com> wrote: 
> To be specific uid=185 (spark user, AKA anonymous) and root are in the same
> group in the docker image itself
> 
> 
> id
> 
> uid=185(185) gid=0(root) groups=0(root)
> 
> 
> So in the docker image conf file, you can create your permanent directory
> as root off /home say
> 
> do it as root (USER 0)
> 
> 
> RUN mkdir -p /home/<MY-DIR>
> 
> RUN chmod g+w /home/<MY-DIR>  ## give write permission to spark
> 
> 
> ARG spark_uid=185
> ..................
> 
> # Specify the User that the actual main process will run as
> 
> USER ${spark_uid}
> 
> 
>    view my Linkedin profile
> <https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>
> 
> 
> 
> *Disclaimer:* Use it at your own risk. Any and all responsibility for any
> loss, damage or destruction of data or any other property which may arise
> from relying on this email's technical content is explicitly disclaimed.
> The author will in no case be liable for any monetary damages arising from
> such loss, damage or destruction.
> 
> 
> 
> 
> On Mon, 30 Aug 2021 at 22:26, Mich Talebzadeh <mi...@gmail.com>
> wrote:
> 
> > Forgot to mention that Spark uses that work directory to unzip the zipped
> > files or gunzip archive files
> >
> > For example
> >
> > pyFiles                 gs://axial-glow-224522-spark-on-k8s/codes/DSBQ.zip
> >
> >
> > Spark will use that $SPARK_HOME/work-dir to unzip DSBQ.zip which is the
> > application package here
> >
> >
> > The alternative is to hack the docker file to create a directory for
> > yourself
> >
> >
> > RUN mkdir -p /home/conf
> >
> > RUN chmod g+w /home/conf
> >
> >
> > HTH
> >
> >
> > *Disclaimer:* Use it at your own risk. Any and all responsibility for any
> > loss, damage or destruction of data or any other property which may arise
> > from relying on this email's technical content is explicitly disclaimed.
> > The author will in no case be liable for any monetary damages arising from
> > such loss, damage or destruction.
> >
> >
> >
> >
> >
> >
> > On Mon, 30 Aug 2021 at 22:13, Mich Talebzadeh <mi...@gmail.com>
> > wrote:
> >
> >> I am not familiar with  jupyterlab  so cannot comment on that.
> >>
> >> However, once your parquet file is written to the work-dir, how are you
> >> going to utilise it?
> >>
> >> HTH
> >>
> >>
> >>
> >>
> >>    view my Linkedin profile
> >> <https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>
> >>
> >>
> >>
> >> *Disclaimer:* Use it at your own risk. Any and all responsibility for
> >> any loss, damage or destruction of data or any other property which may
> >> arise from relying on this email's technical content is explicitly
> >> disclaimed. The author will in no case be liable for any monetary damages
> >> arising from such loss, damage or destruction.
> >>
> >>
> >>
> >>
> >> On Mon, 30 Aug 2021 at 22:05, Bjørn Jørgensen <bj...@gmail.com>
> >> wrote:
> >>
> >>> ok, so when I use spark on k8s I can only save files to s3 buckets or to
> >>> a database?
> >>>
> >>> Note my setup, its spark with jupyterlab on top on k8s.
> >>>
> >>> What are those for if I cant write files from spark in k8s to disk?
> >>>
> >>> "spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
> >>> "False"
> >>> "spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
> >>> "False"
> >>>
> >>> On 2021/08/30 20:50:22, Mich Talebzadeh <mi...@gmail.com>
> >>> wrote:
> >>> > Hi,
> >>> >
> >>> > You are trying to write to work-dir inside the docker and create
> >>> > sub-directories:
> >>> >
> >>> > The error you are getting is this
> >>> >
> >>> > Mkdirs failed to create
> >>> >
> >>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304682784428756208427_0026_m_000000_9563
> >>> > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> >
> >>> > That directory /work-dir is not recognised as a valid directory
> >>> > for storage. It is not in HDFS or HCFS format
> >>> >
> >>> >
> >>> > From Spark you can write to a bucket outside as a permanent storage.
> >>> >
> >>> > HTH
> >>> >
> >>> >
> >>> >    view my Linkedin profile
> >>> > <https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>
> >>> >
> >>> >
> >>> >
> >>> > *Disclaimer:* Use it at your own risk. Any and all responsibility for
> >>> any
> >>> > loss, damage or destruction of data or any other property which may
> >>> arise
> >>> > from relying on this email's technical content is explicitly
> >>> disclaimed.
> >>> > The author will in no case be liable for any monetary damages arising
> >>> from
> >>> > such loss, damage or destruction.
> >>> >
> >>> >
> >>> >
> >>> >
> >>> > On Mon, 30 Aug 2021 at 14:11, Bjørn Jørgensen <
> >>> bjornjorgensen@gmail.com>
> >>> > wrote:
> >>> >
> >>> > > Hi, I have built and running spark on k8s. A link to my repo
> >>> > > https://github.com/bjornjorgensen/jlpyk8s
> >>> > >
> >>> > > Everything seems to be running fine, but I can’t save to PVC.
> >>> > > If I convert the dataframe to pandas, then I can save it.
> >>> > >
> >>> > >
> >>> > >
> >>> > > from pyspark.sql import SparkSession
> >>> > > spark = SparkSession.builder \
> >>> > >     .master("k8s://https://kubernetes.default.svc.cluster.local:443")
> >>> \
> >>> > >     .config("spark.kubernetes.container.image",
> >>> > > "bjornjorgensen/spark-py:v3.2-290821") \
> >>> > >     .config("spark.kubernetes.authenticate.caCertFile",
> >>> "/var/run/secrets/
> >>> > > kubernetes.io/serviceaccount/ca.crt") \
> >>> > >     .config("spark.kubernetes.authenticate.oauthTokenFile",
> >>> > > "/var/run/secrets/kubernetes.io/serviceaccount/token") \
> >>> > >
> >>>  .config("spark.kubernetes.authenticate.driver.serviceAccountName",
> >>> > > "my-pyspark-notebook") \
> >>> > >     .config("spark.executor.instances", "10") \
> >>> > >     .config("spark.driver.host",
> >>> > > "my-pyspark-notebook-spark-driver.default.svc.cluster.local") \
> >>> > >     .config("spark.driver.port", "29413") \
> >>> > >
> >>> > >
> >>> .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.options.claimName",
> >>> > > "nfs100") \
> >>> > >
> >>> > >
> >>> .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.path",
> >>> > > "/opt/spark/work-dir") \
> >>> > >
> >>> > >
> >>> .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.options.claimName",
> >>> > > "nfs100") \
> >>> > >
> >>> > >
> >>> .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.path",
> >>> > > "/opt/spark/work-dir") \
> >>> > >
> >>> > >
> >>> .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
> >>> > > "False") \
> >>> > >
> >>> > >
> >>> .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
> >>> > > "False") \
> >>> > >     .appName("myApp") \
> >>> > >     .config("spark.sql.repl.eagerEval.enabled", "True") \
> >>> > >     .config("spark.driver.memory", "4g") \
> >>> > >     .config("spark.executor.memory", "4g") \
> >>> > >     .getOrCreate()
> >>> > > sc = spark.sparkContext
> >>> > >
> >>> > > pdf.to_parquet("/opt/spark/work-dir/falk/test/F01test.parquet")
> >>> > >
> >>> > >
> >>> > > 21/08/30 12:20:34 WARN WindowExec: No Partition Defined for Window
> >>> > > operation! Moving all data to a single partition, this can cause
> >>> serious
> >>> > > performance degradation.
> >>> > > 21/08/30 12:20:34 WARN WindowExec: No Partition Defined for Window
> >>> > > operation! Moving all data to a single partition, this can cause
> >>> serious
> >>> > > performance degradation.
> >>> > > 21/08/30 12:20:37 WARN WindowExec: No Partition Defined for Window
> >>> > > operation! Moving all data to a single partition, this can cause
> >>> serious
> >>> > > performance degradation.
> >>> > > 21/08/30 12:20:39 WARN TaskSetManager: Lost task 0.0 in stage 25.0
> >>> (TID
> >>> > > 9497) (10.42.0.16 executor 3): java.io.IOException: Mkdirs failed to
> >>> create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220375889526593865835092_0025_m_000000_9497
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > >
> >>> > > 21/08/30 12:20:40 WARN TaskSetManager: Lost task 0.1 in stage 25.0
> >>> (TID
> >>> > > 9498) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
> >>> > > create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220371965695886629589207_0025_m_000000_9498
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > >
> >>> > > 21/08/30 12:20:42 WARN TaskSetManager: Lost task 0.2 in stage 25.0
> >>> (TID
> >>> > > 9499) (10.42.240.4 executor 4): java.io.IOException: Mkdirs failed to
> >>> > > create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220378533320694235394580_0025_m_000000_9499
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > >
> >>> > > 21/08/30 12:20:43 WARN TaskSetManager: Lost task 0.3 in stage 25.0
> >>> (TID
> >>> > > 9500) (10.42.32.15 executor 10): java.io.IOException: Mkdirs failed
> >>> to
> >>> > > create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > >
> >>> > > 21/08/30 12:20:43 ERROR TaskSetManager: Task 0 in stage 25.0 failed 4
> >>> > > times; aborting job
> >>> > > 21/08/30 12:20:43 ERROR FileFormatWriter: Aborting job
> >>> > > d98cdc60-bb44-4189-b483-8449fc793658.
> >>> > > org.apache.spark.SparkException: Job aborted due to stage failure:
> >>> Task 0
> >>> > > in stage 25.0 failed 4 times, most recent failure: Lost task 0.3 in
> >>> stage
> >>> > > 25.0 (TID 9500) (10.42.32.15 executor 10): java.io.IOException:
> >>> Mkdirs
> >>> > > failed to create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > >
> >>> > > Driver stacktrace:
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
> >>> > >         at
> >>> > >
> >>> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
> >>> > >         at
> >>> > >
> >>> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
> >>> > >         at
> >>> > > scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
> >>> > >         at scala.Option.foreach(Option.scala:407)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
> >>> > >         at
> >>> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
> >>> > >         at
> >>> org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
> >>> > >         at
> >>> > > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
> >>> > >         at
> >>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
> >>> > >
> >>> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
> >>> > >         at
> >>> > > org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
> >>> > >         at
> >>> > >
> >>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
> >>> > > Method)
> >>> > >         at
> >>> > >
> >>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> >>> > >         at
> >>> > >
> >>> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> >>> > >         at java.base/java.lang.reflect.Method.invoke(Method.java:566)
> >>> > >         at
> >>> py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
> >>> > >         at
> >>> > > py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
> >>> > >         at py4j.Gateway.invoke(Gateway.java:282)
> >>> > >         at
> >>> > > py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
> >>> > >         at py4j.commands.CallCommand.execute(CallCommand.java:79)
> >>> > >         at
> >>> > >
> >>> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
> >>> > >         at
> >>> py4j.ClientServerConnection.run(ClientServerConnection.java:106)
> >>> > >         at java.base/java.lang.Thread.run(Thread.java:829)
> >>> > > Caused by: java.io.IOException: Mkdirs failed to create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > >
> >>> ---------------------------------------------------------------------------
> >>> > > Py4JJavaError                             Traceback (most recent
> >>> call last)
> >>> > > /tmp/ipykernel_80/163396320.py in <module>
> >>> > > ----> 1
> >>> pdf.to_parquet("/opt/spark/work-dir/falk/test/F01test.parquet")
> >>> > >
> >>> > > /opt/spark/python/pyspark/pandas/frame.py in to_parquet(self, path,
> >>> mode,
> >>> > > partition_cols, compression, index_col, **options)
> >>> > >    4721         if compression is not None:
> >>> > >    4722             builder.option("compression", compression)
> >>> > > -> 4723
> >>>  builder.options(**options).format("parquet").save(path)
> >>> > >    4724
> >>> > >    4725     def to_orc(
> >>> > >
> >>> > > /opt/spark/python/pyspark/sql/readwriter.py in save(self, path,
> >>> format,
> >>> > > mode, partitionBy, **options)
> >>> > >     738             self._jwrite.save()
> >>> > >     739         else:
> >>> > > --> 740             self._jwrite.save(path)
> >>> > >     741
> >>> > >     742     @since(1.4)
> >>> > >
> >>> > > /opt/conda/lib/python3.9/site-packages/py4j/java_gateway.py in
> >>> > > __call__(self, *args)
> >>> > >    1307
> >>> > >    1308         answer = self.gateway_client.send_command(command)
> >>> > > -> 1309         return_value = get_return_value(
> >>> > >    1310             answer, self.gateway_client, self.target_id,
> >>> self.name
> >>> > > )
> >>> > >    1311
> >>> > >
> >>> > > /opt/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
> >>> > >     109     def deco(*a, **kw):
> >>> > >     110         try:
> >>> > > --> 111             return f(*a, **kw)
> >>> > >     112         except py4j.protocol.Py4JJavaError as e:
> >>> > >     113             converted = convert_exception(e.java_exception)
> >>> > >
> >>> > > /opt/conda/lib/python3.9/site-packages/py4j/protocol.py in
> >>> > > get_return_value(answer, gateway_client, target_id, name)
> >>> > >     324             value = OUTPUT_CONVERTER[type](answer[2:],
> >>> > > gateway_client)
> >>> > >     325             if answer[1] == REFERENCE_TYPE:
> >>> > > --> 326                 raise Py4JJavaError(
> >>> > >     327                     "An error occurred while calling
> >>> {0}{1}{2}.\n".
> >>> > >     328                     format(target_id, ".", name), value)
> >>> > >
> >>> > > Py4JJavaError: An error occurred while calling o4804.save.
> >>> > > : org.apache.spark.SparkException: Job aborted.
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.errors.QueryExecutionErrors$.jobAbortedError(QueryExecutionErrors.scala:496)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:251)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
> >>> > >         at
> >>> > > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
> >>> > >         at
> >>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
> >>> > >
> >>> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
> >>> > >         at
> >>> > > org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
> >>> > >         at
> >>> > >
> >>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
> >>> > > Method)
> >>> > >         at
> >>> > >
> >>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> >>> > >         at
> >>> > >
> >>> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> >>> > >         at java.base/java.lang.reflect.Method.invoke(Method.java:566)
> >>> > >         at
> >>> py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
> >>> > >         at
> >>> > > py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
> >>> > >         at py4j.Gateway.invoke(Gateway.java:282)
> >>> > >         at
> >>> > > py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
> >>> > >         at py4j.commands.CallCommand.execute(CallCommand.java:79)
> >>> > >         at
> >>> > >
> >>> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
> >>> > >         at
> >>> py4j.ClientServerConnection.run(ClientServerConnection.java:106)
> >>> > >         at java.base/java.lang.Thread.run(Thread.java:829)
> >>> > > Caused by: org.apache.spark.SparkException: Job aborted due to stage
> >>> > > failure: Task 0 in stage 25.0 failed 4 times, most recent failure:
> >>> Lost
> >>> > > task 0.3 in stage 25.0 (TID 9500) (10.42.32.15 executor 10):
> >>> > > java.io.IOException: Mkdirs failed to create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > >
> >>> > > Driver stacktrace:
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
> >>> > >         at
> >>> > >
> >>> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
> >>> > >         at
> >>> > >
> >>> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
> >>> > >         at
> >>> > > scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
> >>> > >         at scala.Option.foreach(Option.scala:407)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
> >>> > >         at
> >>> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
> >>> > >         at
> >>> org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
> >>> > >         ... 41 more
> >>> > > Caused by: java.io.IOException: Mkdirs failed to create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > >
> >>> > >
> >>> > >
> >>> > >
> >>> > > df.write.parquet("/opt/spark/work-dir/falk/F01test_df.parquet",
> >>> > > mode="overwrite")
> >>> > >
> >>> > >
> >>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 2.0 in stage 26.0
> >>> (TID
> >>> > > 9543) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
> >>> > > create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_20210829190630570334759957727637_0026_m_000002_9543
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > >
> >>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 1.0 in stage 26.0
> >>> (TID
> >>> > > 9542) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
> >>> > > create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906306992160257769852924_0026_m_000001_9542
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > >
> >>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 4.0 in stage 26.0
> >>> (TID
> >>> > > 9545) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to
> >>> create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906305635902832664702349_0026_m_000004_9545
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > >
> >>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 10.0 in stage 26.0
> >>> (TID
> >>> > > 9551) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
> >>> > > create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303695223706240035696_0026_m_000010_9551
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > >
> >>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 2.1 in stage 26.0
> >>> (TID
> >>> > > 9552) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
> >>> > > create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303153023682655991980_0026_m_000002_9552
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > >
> >>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 1.1 in stage 26.0
> >>> (TID
> >>> > > 9553) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to
> >>> create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303560179987922074406_0026_m_000001_9553
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > >
> >>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 2.2 in stage 26.0
> >>> (TID
> >>> > > 9556) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to
> >>> create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906307237274992181823763_0026_m_000002_9556
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > >
> >>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 4.1 in stage 26.0
> >>> (TID
> >>> > > 9554) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
> >>> > > create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906306864123848918470508_0026_m_000004_9554
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > >
> >>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 10.1 in stage 26.0
> >>> (TID
> >>> > > 9555) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
> >>> > > create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302601970797047480301_0026_m_000010_9555
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > >
> >>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 0.0 in stage 26.0
> >>> (TID
> >>> > > 9541) (10.42.192.9 executor 8): java.io.IOException: Mkdirs failed to
> >>> > > create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304154332762277555982_0026_m_000000_9541
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > >
> >>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 7.0 in stage 26.0
> >>> (TID
> >>> > > 9548) (10.42.240.4 executor 4): java.io.IOException: Mkdirs failed to
> >>> > > create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_20210829190630694656629969727231_0026_m_000007_9548
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > >
> >>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 4.2 in stage 26.0
> >>> (TID
> >>> > > 9559) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
> >>> > > create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303848774759656984701_0026_m_000004_9559
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > >
> >>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 2.3 in stage 26.0
> >>> (TID
> >>> > > 9558) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
> >>> > > create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > >
> >>> > > 21/08/29 19:06:31 ERROR TaskSetManager: Task 2 in stage 26.0 failed 4
> >>> > > times; aborting job
> >>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 1.2 in stage 26.0
> >>> (TID
> >>> > > 9557) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to
> >>> create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302130961573080351978_0026_m_000001_9557
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > >
> >>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 8.0 in stage 26.0
> >>> (TID
> >>> > > 9549) (10.42.0.17 executor 6): java.io.IOException: Mkdirs failed to
> >>> create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906306776907150898092479_0026_m_000008_9549
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > >
> >>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 5.0 in stage 26.0
> >>> (TID
> >>> > > 9546) (10.42.96.9 executor 7): java.io.IOException: Mkdirs failed to
> >>> create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906301725332653584503335_0026_m_000005_9546
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > >
> >>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 11.0 in stage 26.0
> >>> (TID
> >>> > > 9561) (10.42.240.4 executor 4): java.io.IOException: Mkdirs failed to
> >>> > > create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302182889441465469285_0026_m_000011_9561
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > >
> >>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 0.1 in stage 26.0
> >>> (TID
> >>> > > 9563) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
> >>> > > create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304682784428756208427_0026_m_000000_9563
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > >
> >>> > > 21/08/29 19:06:31 ERROR FileFormatWriter: Aborting job
> >>> > > 115cf3ce-5a9b-4274-8752-b6ead281f104.
> >>> > > org.apache.spark.SparkException: Job aborted due to stage failure:
> >>> Task 2
> >>> > > in stage 26.0 failed 4 times, most recent failure: Lost task 2.3 in
> >>> stage
> >>> > > 26.0 (TID 9558) (10.42.240.3 executor 1): java.io.IOException: Mkdirs
> >>> > > failed to create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > >
> >>> > > Driver stacktrace:
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
> >>> > >         at
> >>> > >
> >>> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
> >>> > >         at
> >>> > >
> >>> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
> >>> > >         at
> >>> > > scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
> >>> > >         at scala.Option.foreach(Option.scala:407)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
> >>> > >         at
> >>> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
> >>> > >         at
> >>> org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
> >>> > >         at
> >>> > > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
> >>> > >         at
> >>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
> >>> > >
> >>> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
> >>> > >         at
> >>> > > org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:781)
> >>> > >         at
> >>> > >
> >>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
> >>> > > Method)
> >>> > >         at
> >>> > >
> >>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> >>> > >         at
> >>> > >
> >>> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> >>> > >         at java.base/java.lang.reflect.Method.invoke(Method.java:566)
> >>> > >         at
> >>> py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
> >>> > >         at
> >>> > > py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
> >>> > >         at py4j.Gateway.invoke(Gateway.java:282)
> >>> > >         at
> >>> > > py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
> >>> > >         at py4j.commands.CallCommand.execute(CallCommand.java:79)
> >>> > >         at
> >>> > >
> >>> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
> >>> > >         at
> >>> py4j.ClientServerConnection.run(ClientServerConnection.java:106)
> >>> > >         at java.base/java.lang.Thread.run(Thread.java:829)
> >>> > > Caused by: java.io.IOException: Mkdirs failed to create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 7.1 in stage 26.0
> >>> (TID
> >>> > > 9562) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
> >>> > > create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304432517406660432032_0026_m_000007_9562
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > >
> >>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 9.0 in stage 26.0
> >>> (TID
> >>> > > 9550) (10.42.128.10 executor 9): java.io.IOException: Mkdirs failed
> >>> to
> >>> > > create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906305148471752983202631_0026_m_000009_9550
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > >
> >>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 14.0 in stage 26.0
> >>> (TID
> >>> > > 9566) (10.42.96.9 executor 7): TaskKilled (Stage cancelled)
> >>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 6.0 in stage 26.0
> >>> (TID
> >>> > > 9547) (10.42.32.12 executor 11): TaskKilled (Stage cancelled)
> >>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 13.0 in stage 26.0
> >>> (TID
> >>> > > 9565) (10.42.0.17 executor 6): TaskKilled (Stage cancelled)
> >>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 12.0 in stage 26.0
> >>> (TID
> >>> > > 9564) (10.42.0.12 executor 3): TaskKilled (Stage cancelled)
> >>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 10.2 in stage 26.0
> >>> (TID
> >>> > > 9560) (10.42.192.9 executor 8): TaskKilled (Stage cancelled)
> >>> > > [Stage 26:>                                                       (0
> >>> + 1)
> >>> > > / 132]
> >>> > >
> >>> ---------------------------------------------------------------------------
> >>> > > Py4JJavaError                             Traceback (most recent
> >>> call last)
> >>> > > /tmp/ipykernel_80/610855484.py in <module>
> >>> > > ----> 1
> >>> df.write.parquet("/opt/spark/work-dir/falk/F01test_df.parquet",
> >>> > > mode="overwrite")
> >>> > >
> >>> > > /opt/spark/python/pyspark/sql/readwriter.py in parquet(self, path,
> >>> mode,
> >>> > > partitionBy, compression)
> >>> > >     883             self.partitionBy(partitionBy)
> >>> > >     884         self._set_opts(compression=compression)
> >>> > > --> 885         self._jwrite.parquet(path)
> >>> > >     886
> >>> > >     887     def text(self, path, compression=None, lineSep=None):
> >>> > >
> >>> > > /opt/conda/lib/python3.9/site-packages/py4j/java_gateway.py in
> >>> > > __call__(self, *args)
> >>> > >    1307
> >>> > >    1308         answer = self.gateway_client.send_command(command)
> >>> > > -> 1309         return_value = get_return_value(
> >>> > >    1310             answer, self.gateway_client, self.target_id,
> >>> self.name
> >>> > > )
> >>> > >    1311
> >>> > >
> >>> > > /opt/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
> >>> > >     109     def deco(*a, **kw):
> >>> > >     110         try:
> >>> > > --> 111             return f(*a, **kw)
> >>> > >     112         except py4j.protocol.Py4JJavaError as e:
> >>> > >     113             converted = convert_exception(e.java_exception)
> >>> > >
> >>> > > /opt/conda/lib/python3.9/site-packages/py4j/protocol.py in
> >>> > > get_return_value(answer, gateway_client, target_id, name)
> >>> > >     324             value = OUTPUT_CONVERTER[type](answer[2:],
> >>> > > gateway_client)
> >>> > >     325             if answer[1] == REFERENCE_TYPE:
> >>> > > --> 326                 raise Py4JJavaError(
> >>> > >     327                     "An error occurred while calling
> >>> {0}{1}{2}.\n".
> >>> > >     328                     format(target_id, ".", name), value)
> >>> > >
> >>> > > Py4JJavaError: An error occurred while calling o15435.parquet.
> >>> > > : org.apache.spark.SparkException: Job aborted.
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.errors.QueryExecutionErrors$.jobAbortedError(QueryExecutionErrors.scala:496)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:251)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
> >>> > >         at
> >>> > > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
> >>> > >         at
> >>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
> >>> > >
> >>> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
> >>> > >         at
> >>> > > org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:781)
> >>> > >         at
> >>> > >
> >>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
> >>> > > Method)
> >>> > >         at
> >>> > >
> >>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> >>> > >         at
> >>> > >
> >>> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> >>> > >         at java.base/java.lang.reflect.Method.invoke(Method.java:566)
> >>> > >         at
> >>> py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
> >>> > >         at
> >>> > > py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
> >>> > >         at py4j.Gateway.invoke(Gateway.java:282)
> >>> > >         at
> >>> > > py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
> >>> > >         at py4j.commands.CallCommand.execute(CallCommand.java:79)
> >>> > >         at
> >>> > >
> >>> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
> >>> > >         at
> >>> py4j.ClientServerConnection.run(ClientServerConnection.java:106)
> >>> > >         at java.base/java.lang.Thread.run(Thread.java:829)
> >>> > > Caused by: org.apache.spark.SparkException: Job aborted due to stage
> >>> > > failure: Task 2 in stage 26.0 failed 4 times, most recent failure:
> >>> Lost
> >>> > > task 2.3 in stage 26.0 (TID 9558) (10.42.240.3 executor 1):
> >>> > > java.io.IOException: Mkdirs failed to create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > >
> >>> > > Driver stacktrace:
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
> >>> > >         at
> >>> > >
> >>> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
> >>> > >         at
> >>> > >
> >>> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
> >>> > >         at
> >>> > > scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
> >>> > >         at scala.Option.foreach(Option.scala:407)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
> >>> > >         at
> >>> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
> >>> > >         at
> >>> org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
> >>> > >         ... 42 more
> >>> > > Caused by: java.io.IOException: Mkdirs failed to create
> >>> > >
> >>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
> >>> > > (exists=false, cwd=file:/opt/spark/work-dir)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >>> > >         at
> >>> > >
> >>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >>> > >         at
> >>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >>> > >         at
> >>> > >
> >>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >>> > >         at
> >>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >>> > >         at
> >>> > >
> >>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >>> > >         at
> >>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >>> > >         at
> >>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> >>> Source)
> >>> > >         at
> >>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> >>> Source)
> >>> > >         at java.base/java.lang.Thread.run(Unknown Source)
> >>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 3.0 in stage 26.0
> >>> (TID
> >>> > > 9544) (10.42.0.18 executor 12): TaskKilled (Stage cancelled)
> >>> > >
> >>> > >
> >>> > >
> >>> > >
> >>> > >
> >>> > >
> >>> > >
> >>> > >
> >>> > >
> >>> > >
> >>> > > ---------------------------------------------------------------------
> >>> > > To unsubscribe e-mail: user-unsubscribe@spark.apache.org
> >>> > >
> >>> > >
> >>> >
> >>>
> >>> ---------------------------------------------------------------------
> >>> To unsubscribe e-mail: user-unsubscribe@spark.apache.org
> >>>
> >>>
> 

---------------------------------------------------------------------
To unsubscribe e-mail: user-unsubscribe@spark.apache.org


Re: Can’t write to PVC in K8S

Posted by Mich Talebzadeh <mi...@gmail.com>.
To be specific uid=185 (spark user, AKA anonymous) and root are in the same
group in the docker image itself


id

uid=185(185) gid=0(root) groups=0(root)


So in the docker image conf file, you can create your permanent directory
as root off /home say

do it as root (USER 0)


RUN mkdir -p /home/<MY-DIR>

RUN chmod g+w /home/<MY-DIR>  ## give write permission to spark


ARG spark_uid=185
..................

# Specify the User that the actual main process will run as

USER ${spark_uid}


   view my Linkedin profile
<https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>



*Disclaimer:* Use it at your own risk. Any and all responsibility for any
loss, damage or destruction of data or any other property which may arise
from relying on this email's technical content is explicitly disclaimed.
The author will in no case be liable for any monetary damages arising from
such loss, damage or destruction.




On Mon, 30 Aug 2021 at 22:26, Mich Talebzadeh <mi...@gmail.com>
wrote:

> Forgot to mention that Spark uses that work directory to unzip the zipped
> files or gunzip archive files
>
> For example
>
> pyFiles                 gs://axial-glow-224522-spark-on-k8s/codes/DSBQ.zip
>
>
> Spark will use that $SPARK_HOME/work-dir to unzip DSBQ.zip which is the
> application package here
>
>
> The alternative is to hack the docker file to create a directory for
> yourself
>
>
> RUN mkdir -p /home/conf
>
> RUN chmod g+w /home/conf
>
>
> HTH
>
>
> *Disclaimer:* Use it at your own risk. Any and all responsibility for any
> loss, damage or destruction of data or any other property which may arise
> from relying on this email's technical content is explicitly disclaimed.
> The author will in no case be liable for any monetary damages arising from
> such loss, damage or destruction.
>
>
>
>
>
>
> On Mon, 30 Aug 2021 at 22:13, Mich Talebzadeh <mi...@gmail.com>
> wrote:
>
>> I am not familiar with  jupyterlab  so cannot comment on that.
>>
>> However, once your parquet file is written to the work-dir, how are you
>> going to utilise it?
>>
>> HTH
>>
>>
>>
>>
>>    view my Linkedin profile
>> <https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>
>>
>>
>>
>> *Disclaimer:* Use it at your own risk. Any and all responsibility for
>> any loss, damage or destruction of data or any other property which may
>> arise from relying on this email's technical content is explicitly
>> disclaimed. The author will in no case be liable for any monetary damages
>> arising from such loss, damage or destruction.
>>
>>
>>
>>
>> On Mon, 30 Aug 2021 at 22:05, Bjørn Jørgensen <bj...@gmail.com>
>> wrote:
>>
>>> ok, so when I use spark on k8s I can only save files to s3 buckets or to
>>> a database?
>>>
>>> Note my setup, its spark with jupyterlab on top on k8s.
>>>
>>> What are those for if I cant write files from spark in k8s to disk?
>>>
>>> "spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
>>> "False"
>>> "spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
>>> "False"
>>>
>>> On 2021/08/30 20:50:22, Mich Talebzadeh <mi...@gmail.com>
>>> wrote:
>>> > Hi,
>>> >
>>> > You are trying to write to work-dir inside the docker and create
>>> > sub-directories:
>>> >
>>> > The error you are getting is this
>>> >
>>> > Mkdirs failed to create
>>> >
>>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304682784428756208427_0026_m_000000_9563
>>> > (exists=false, cwd=file:/opt/spark/work-dir)
>>> >
>>> > That directory /work-dir is not recognised as a valid directory
>>> > for storage. It is not in HDFS or HCFS format
>>> >
>>> >
>>> > From Spark you can write to a bucket outside as a permanent storage.
>>> >
>>> > HTH
>>> >
>>> >
>>> >    view my Linkedin profile
>>> > <https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>
>>> >
>>> >
>>> >
>>> > *Disclaimer:* Use it at your own risk. Any and all responsibility for
>>> any
>>> > loss, damage or destruction of data or any other property which may
>>> arise
>>> > from relying on this email's technical content is explicitly
>>> disclaimed.
>>> > The author will in no case be liable for any monetary damages arising
>>> from
>>> > such loss, damage or destruction.
>>> >
>>> >
>>> >
>>> >
>>> > On Mon, 30 Aug 2021 at 14:11, Bjørn Jørgensen <
>>> bjornjorgensen@gmail.com>
>>> > wrote:
>>> >
>>> > > Hi, I have built and running spark on k8s. A link to my repo
>>> > > https://github.com/bjornjorgensen/jlpyk8s
>>> > >
>>> > > Everything seems to be running fine, but I can’t save to PVC.
>>> > > If I convert the dataframe to pandas, then I can save it.
>>> > >
>>> > >
>>> > >
>>> > > from pyspark.sql import SparkSession
>>> > > spark = SparkSession.builder \
>>> > >     .master("k8s://https://kubernetes.default.svc.cluster.local:443")
>>> \
>>> > >     .config("spark.kubernetes.container.image",
>>> > > "bjornjorgensen/spark-py:v3.2-290821") \
>>> > >     .config("spark.kubernetes.authenticate.caCertFile",
>>> "/var/run/secrets/
>>> > > kubernetes.io/serviceaccount/ca.crt") \
>>> > >     .config("spark.kubernetes.authenticate.oauthTokenFile",
>>> > > "/var/run/secrets/kubernetes.io/serviceaccount/token") \
>>> > >
>>>  .config("spark.kubernetes.authenticate.driver.serviceAccountName",
>>> > > "my-pyspark-notebook") \
>>> > >     .config("spark.executor.instances", "10") \
>>> > >     .config("spark.driver.host",
>>> > > "my-pyspark-notebook-spark-driver.default.svc.cluster.local") \
>>> > >     .config("spark.driver.port", "29413") \
>>> > >
>>> > >
>>> .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.options.claimName",
>>> > > "nfs100") \
>>> > >
>>> > >
>>> .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.path",
>>> > > "/opt/spark/work-dir") \
>>> > >
>>> > >
>>> .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.options.claimName",
>>> > > "nfs100") \
>>> > >
>>> > >
>>> .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.path",
>>> > > "/opt/spark/work-dir") \
>>> > >
>>> > >
>>> .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
>>> > > "False") \
>>> > >
>>> > >
>>> .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
>>> > > "False") \
>>> > >     .appName("myApp") \
>>> > >     .config("spark.sql.repl.eagerEval.enabled", "True") \
>>> > >     .config("spark.driver.memory", "4g") \
>>> > >     .config("spark.executor.memory", "4g") \
>>> > >     .getOrCreate()
>>> > > sc = spark.sparkContext
>>> > >
>>> > > pdf.to_parquet("/opt/spark/work-dir/falk/test/F01test.parquet")
>>> > >
>>> > >
>>> > > 21/08/30 12:20:34 WARN WindowExec: No Partition Defined for Window
>>> > > operation! Moving all data to a single partition, this can cause
>>> serious
>>> > > performance degradation.
>>> > > 21/08/30 12:20:34 WARN WindowExec: No Partition Defined for Window
>>> > > operation! Moving all data to a single partition, this can cause
>>> serious
>>> > > performance degradation.
>>> > > 21/08/30 12:20:37 WARN WindowExec: No Partition Defined for Window
>>> > > operation! Moving all data to a single partition, this can cause
>>> serious
>>> > > performance degradation.
>>> > > 21/08/30 12:20:39 WARN TaskSetManager: Lost task 0.0 in stage 25.0
>>> (TID
>>> > > 9497) (10.42.0.16 executor 3): java.io.IOException: Mkdirs failed to
>>> create
>>> > >
>>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220375889526593865835092_0025_m_000000_9497
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > >
>>> > > 21/08/30 12:20:40 WARN TaskSetManager: Lost task 0.1 in stage 25.0
>>> (TID
>>> > > 9498) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
>>> > > create
>>> > >
>>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220371965695886629589207_0025_m_000000_9498
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > >
>>> > > 21/08/30 12:20:42 WARN TaskSetManager: Lost task 0.2 in stage 25.0
>>> (TID
>>> > > 9499) (10.42.240.4 executor 4): java.io.IOException: Mkdirs failed to
>>> > > create
>>> > >
>>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220378533320694235394580_0025_m_000000_9499
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > >
>>> > > 21/08/30 12:20:43 WARN TaskSetManager: Lost task 0.3 in stage 25.0
>>> (TID
>>> > > 9500) (10.42.32.15 executor 10): java.io.IOException: Mkdirs failed
>>> to
>>> > > create
>>> > >
>>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > >
>>> > > 21/08/30 12:20:43 ERROR TaskSetManager: Task 0 in stage 25.0 failed 4
>>> > > times; aborting job
>>> > > 21/08/30 12:20:43 ERROR FileFormatWriter: Aborting job
>>> > > d98cdc60-bb44-4189-b483-8449fc793658.
>>> > > org.apache.spark.SparkException: Job aborted due to stage failure:
>>> Task 0
>>> > > in stage 25.0 failed 4 times, most recent failure: Lost task 0.3 in
>>> stage
>>> > > 25.0 (TID 9500) (10.42.32.15 executor 10): java.io.IOException:
>>> Mkdirs
>>> > > failed to create
>>> > >
>>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > >
>>> > > Driver stacktrace:
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
>>> > >         at
>>> > >
>>> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
>>> > >         at
>>> > >
>>> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
>>> > >         at
>>> > > scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
>>> > >         at scala.Option.foreach(Option.scala:407)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
>>> > >         at
>>> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
>>> > >         at
>>> org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
>>> > >         at
>>> > > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
>>> > >         at
>>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
>>> > >
>>> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
>>> > >         at
>>> > > org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
>>> > >         at
>>> > >
>>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
>>> > > Method)
>>> > >         at
>>> > >
>>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>>> > >         at
>>> > >
>>> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>>> > >         at java.base/java.lang.reflect.Method.invoke(Method.java:566)
>>> > >         at
>>> py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
>>> > >         at
>>> > > py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
>>> > >         at py4j.Gateway.invoke(Gateway.java:282)
>>> > >         at
>>> > > py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
>>> > >         at py4j.commands.CallCommand.execute(CallCommand.java:79)
>>> > >         at
>>> > >
>>> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
>>> > >         at
>>> py4j.ClientServerConnection.run(ClientServerConnection.java:106)
>>> > >         at java.base/java.lang.Thread.run(Thread.java:829)
>>> > > Caused by: java.io.IOException: Mkdirs failed to create
>>> > >
>>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > >
>>> ---------------------------------------------------------------------------
>>> > > Py4JJavaError                             Traceback (most recent
>>> call last)
>>> > > /tmp/ipykernel_80/163396320.py in <module>
>>> > > ----> 1
>>> pdf.to_parquet("/opt/spark/work-dir/falk/test/F01test.parquet")
>>> > >
>>> > > /opt/spark/python/pyspark/pandas/frame.py in to_parquet(self, path,
>>> mode,
>>> > > partition_cols, compression, index_col, **options)
>>> > >    4721         if compression is not None:
>>> > >    4722             builder.option("compression", compression)
>>> > > -> 4723
>>>  builder.options(**options).format("parquet").save(path)
>>> > >    4724
>>> > >    4725     def to_orc(
>>> > >
>>> > > /opt/spark/python/pyspark/sql/readwriter.py in save(self, path,
>>> format,
>>> > > mode, partitionBy, **options)
>>> > >     738             self._jwrite.save()
>>> > >     739         else:
>>> > > --> 740             self._jwrite.save(path)
>>> > >     741
>>> > >     742     @since(1.4)
>>> > >
>>> > > /opt/conda/lib/python3.9/site-packages/py4j/java_gateway.py in
>>> > > __call__(self, *args)
>>> > >    1307
>>> > >    1308         answer = self.gateway_client.send_command(command)
>>> > > -> 1309         return_value = get_return_value(
>>> > >    1310             answer, self.gateway_client, self.target_id,
>>> self.name
>>> > > )
>>> > >    1311
>>> > >
>>> > > /opt/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
>>> > >     109     def deco(*a, **kw):
>>> > >     110         try:
>>> > > --> 111             return f(*a, **kw)
>>> > >     112         except py4j.protocol.Py4JJavaError as e:
>>> > >     113             converted = convert_exception(e.java_exception)
>>> > >
>>> > > /opt/conda/lib/python3.9/site-packages/py4j/protocol.py in
>>> > > get_return_value(answer, gateway_client, target_id, name)
>>> > >     324             value = OUTPUT_CONVERTER[type](answer[2:],
>>> > > gateway_client)
>>> > >     325             if answer[1] == REFERENCE_TYPE:
>>> > > --> 326                 raise Py4JJavaError(
>>> > >     327                     "An error occurred while calling
>>> {0}{1}{2}.\n".
>>> > >     328                     format(target_id, ".", name), value)
>>> > >
>>> > > Py4JJavaError: An error occurred while calling o4804.save.
>>> > > : org.apache.spark.SparkException: Job aborted.
>>> > >         at
>>> > >
>>> org.apache.spark.sql.errors.QueryExecutionErrors$.jobAbortedError(QueryExecutionErrors.scala:496)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:251)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
>>> > >         at
>>> > > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
>>> > >         at
>>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
>>> > >
>>> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
>>> > >         at
>>> > > org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
>>> > >         at
>>> > >
>>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
>>> > > Method)
>>> > >         at
>>> > >
>>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>>> > >         at
>>> > >
>>> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>>> > >         at java.base/java.lang.reflect.Method.invoke(Method.java:566)
>>> > >         at
>>> py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
>>> > >         at
>>> > > py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
>>> > >         at py4j.Gateway.invoke(Gateway.java:282)
>>> > >         at
>>> > > py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
>>> > >         at py4j.commands.CallCommand.execute(CallCommand.java:79)
>>> > >         at
>>> > >
>>> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
>>> > >         at
>>> py4j.ClientServerConnection.run(ClientServerConnection.java:106)
>>> > >         at java.base/java.lang.Thread.run(Thread.java:829)
>>> > > Caused by: org.apache.spark.SparkException: Job aborted due to stage
>>> > > failure: Task 0 in stage 25.0 failed 4 times, most recent failure:
>>> Lost
>>> > > task 0.3 in stage 25.0 (TID 9500) (10.42.32.15 executor 10):
>>> > > java.io.IOException: Mkdirs failed to create
>>> > >
>>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > >
>>> > > Driver stacktrace:
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
>>> > >         at
>>> > >
>>> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
>>> > >         at
>>> > >
>>> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
>>> > >         at
>>> > > scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
>>> > >         at scala.Option.foreach(Option.scala:407)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
>>> > >         at
>>> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
>>> > >         at
>>> org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
>>> > >         ... 41 more
>>> > > Caused by: java.io.IOException: Mkdirs failed to create
>>> > >
>>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > >
>>> > >
>>> > >
>>> > >
>>> > > df.write.parquet("/opt/spark/work-dir/falk/F01test_df.parquet",
>>> > > mode="overwrite")
>>> > >
>>> > >
>>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 2.0 in stage 26.0
>>> (TID
>>> > > 9543) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
>>> > > create
>>> > >
>>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_20210829190630570334759957727637_0026_m_000002_9543
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > >
>>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 1.0 in stage 26.0
>>> (TID
>>> > > 9542) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
>>> > > create
>>> > >
>>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906306992160257769852924_0026_m_000001_9542
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > >
>>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 4.0 in stage 26.0
>>> (TID
>>> > > 9545) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to
>>> create
>>> > >
>>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906305635902832664702349_0026_m_000004_9545
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > >
>>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 10.0 in stage 26.0
>>> (TID
>>> > > 9551) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
>>> > > create
>>> > >
>>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303695223706240035696_0026_m_000010_9551
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > >
>>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 2.1 in stage 26.0
>>> (TID
>>> > > 9552) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
>>> > > create
>>> > >
>>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303153023682655991980_0026_m_000002_9552
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > >
>>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 1.1 in stage 26.0
>>> (TID
>>> > > 9553) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to
>>> create
>>> > >
>>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303560179987922074406_0026_m_000001_9553
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > >
>>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 2.2 in stage 26.0
>>> (TID
>>> > > 9556) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to
>>> create
>>> > >
>>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906307237274992181823763_0026_m_000002_9556
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > >
>>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 4.1 in stage 26.0
>>> (TID
>>> > > 9554) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
>>> > > create
>>> > >
>>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906306864123848918470508_0026_m_000004_9554
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > >
>>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 10.1 in stage 26.0
>>> (TID
>>> > > 9555) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
>>> > > create
>>> > >
>>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302601970797047480301_0026_m_000010_9555
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > >
>>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 0.0 in stage 26.0
>>> (TID
>>> > > 9541) (10.42.192.9 executor 8): java.io.IOException: Mkdirs failed to
>>> > > create
>>> > >
>>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304154332762277555982_0026_m_000000_9541
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > >
>>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 7.0 in stage 26.0
>>> (TID
>>> > > 9548) (10.42.240.4 executor 4): java.io.IOException: Mkdirs failed to
>>> > > create
>>> > >
>>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_20210829190630694656629969727231_0026_m_000007_9548
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > >
>>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 4.2 in stage 26.0
>>> (TID
>>> > > 9559) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
>>> > > create
>>> > >
>>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303848774759656984701_0026_m_000004_9559
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > >
>>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 2.3 in stage 26.0
>>> (TID
>>> > > 9558) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
>>> > > create
>>> > >
>>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > >
>>> > > 21/08/29 19:06:31 ERROR TaskSetManager: Task 2 in stage 26.0 failed 4
>>> > > times; aborting job
>>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 1.2 in stage 26.0
>>> (TID
>>> > > 9557) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to
>>> create
>>> > >
>>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302130961573080351978_0026_m_000001_9557
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > >
>>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 8.0 in stage 26.0
>>> (TID
>>> > > 9549) (10.42.0.17 executor 6): java.io.IOException: Mkdirs failed to
>>> create
>>> > >
>>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906306776907150898092479_0026_m_000008_9549
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > >
>>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 5.0 in stage 26.0
>>> (TID
>>> > > 9546) (10.42.96.9 executor 7): java.io.IOException: Mkdirs failed to
>>> create
>>> > >
>>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906301725332653584503335_0026_m_000005_9546
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > >
>>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 11.0 in stage 26.0
>>> (TID
>>> > > 9561) (10.42.240.4 executor 4): java.io.IOException: Mkdirs failed to
>>> > > create
>>> > >
>>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302182889441465469285_0026_m_000011_9561
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > >
>>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 0.1 in stage 26.0
>>> (TID
>>> > > 9563) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
>>> > > create
>>> > >
>>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304682784428756208427_0026_m_000000_9563
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > >
>>> > > 21/08/29 19:06:31 ERROR FileFormatWriter: Aborting job
>>> > > 115cf3ce-5a9b-4274-8752-b6ead281f104.
>>> > > org.apache.spark.SparkException: Job aborted due to stage failure:
>>> Task 2
>>> > > in stage 26.0 failed 4 times, most recent failure: Lost task 2.3 in
>>> stage
>>> > > 26.0 (TID 9558) (10.42.240.3 executor 1): java.io.IOException: Mkdirs
>>> > > failed to create
>>> > >
>>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > >
>>> > > Driver stacktrace:
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
>>> > >         at
>>> > >
>>> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
>>> > >         at
>>> > >
>>> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
>>> > >         at
>>> > > scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
>>> > >         at scala.Option.foreach(Option.scala:407)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
>>> > >         at
>>> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
>>> > >         at
>>> org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
>>> > >         at
>>> > > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
>>> > >         at
>>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
>>> > >
>>> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
>>> > >         at
>>> > > org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:781)
>>> > >         at
>>> > >
>>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
>>> > > Method)
>>> > >         at
>>> > >
>>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>>> > >         at
>>> > >
>>> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>>> > >         at java.base/java.lang.reflect.Method.invoke(Method.java:566)
>>> > >         at
>>> py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
>>> > >         at
>>> > > py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
>>> > >         at py4j.Gateway.invoke(Gateway.java:282)
>>> > >         at
>>> > > py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
>>> > >         at py4j.commands.CallCommand.execute(CallCommand.java:79)
>>> > >         at
>>> > >
>>> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
>>> > >         at
>>> py4j.ClientServerConnection.run(ClientServerConnection.java:106)
>>> > >         at java.base/java.lang.Thread.run(Thread.java:829)
>>> > > Caused by: java.io.IOException: Mkdirs failed to create
>>> > >
>>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 7.1 in stage 26.0
>>> (TID
>>> > > 9562) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
>>> > > create
>>> > >
>>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304432517406660432032_0026_m_000007_9562
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > >
>>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 9.0 in stage 26.0
>>> (TID
>>> > > 9550) (10.42.128.10 executor 9): java.io.IOException: Mkdirs failed
>>> to
>>> > > create
>>> > >
>>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906305148471752983202631_0026_m_000009_9550
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > >
>>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 14.0 in stage 26.0
>>> (TID
>>> > > 9566) (10.42.96.9 executor 7): TaskKilled (Stage cancelled)
>>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 6.0 in stage 26.0
>>> (TID
>>> > > 9547) (10.42.32.12 executor 11): TaskKilled (Stage cancelled)
>>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 13.0 in stage 26.0
>>> (TID
>>> > > 9565) (10.42.0.17 executor 6): TaskKilled (Stage cancelled)
>>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 12.0 in stage 26.0
>>> (TID
>>> > > 9564) (10.42.0.12 executor 3): TaskKilled (Stage cancelled)
>>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 10.2 in stage 26.0
>>> (TID
>>> > > 9560) (10.42.192.9 executor 8): TaskKilled (Stage cancelled)
>>> > > [Stage 26:>                                                       (0
>>> + 1)
>>> > > / 132]
>>> > >
>>> ---------------------------------------------------------------------------
>>> > > Py4JJavaError                             Traceback (most recent
>>> call last)
>>> > > /tmp/ipykernel_80/610855484.py in <module>
>>> > > ----> 1
>>> df.write.parquet("/opt/spark/work-dir/falk/F01test_df.parquet",
>>> > > mode="overwrite")
>>> > >
>>> > > /opt/spark/python/pyspark/sql/readwriter.py in parquet(self, path,
>>> mode,
>>> > > partitionBy, compression)
>>> > >     883             self.partitionBy(partitionBy)
>>> > >     884         self._set_opts(compression=compression)
>>> > > --> 885         self._jwrite.parquet(path)
>>> > >     886
>>> > >     887     def text(self, path, compression=None, lineSep=None):
>>> > >
>>> > > /opt/conda/lib/python3.9/site-packages/py4j/java_gateway.py in
>>> > > __call__(self, *args)
>>> > >    1307
>>> > >    1308         answer = self.gateway_client.send_command(command)
>>> > > -> 1309         return_value = get_return_value(
>>> > >    1310             answer, self.gateway_client, self.target_id,
>>> self.name
>>> > > )
>>> > >    1311
>>> > >
>>> > > /opt/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
>>> > >     109     def deco(*a, **kw):
>>> > >     110         try:
>>> > > --> 111             return f(*a, **kw)
>>> > >     112         except py4j.protocol.Py4JJavaError as e:
>>> > >     113             converted = convert_exception(e.java_exception)
>>> > >
>>> > > /opt/conda/lib/python3.9/site-packages/py4j/protocol.py in
>>> > > get_return_value(answer, gateway_client, target_id, name)
>>> > >     324             value = OUTPUT_CONVERTER[type](answer[2:],
>>> > > gateway_client)
>>> > >     325             if answer[1] == REFERENCE_TYPE:
>>> > > --> 326                 raise Py4JJavaError(
>>> > >     327                     "An error occurred while calling
>>> {0}{1}{2}.\n".
>>> > >     328                     format(target_id, ".", name), value)
>>> > >
>>> > > Py4JJavaError: An error occurred while calling o15435.parquet.
>>> > > : org.apache.spark.SparkException: Job aborted.
>>> > >         at
>>> > >
>>> org.apache.spark.sql.errors.QueryExecutionErrors$.jobAbortedError(QueryExecutionErrors.scala:496)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:251)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
>>> > >         at
>>> > > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
>>> > >         at
>>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
>>> > >
>>> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
>>> > >         at
>>> > > org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:781)
>>> > >         at
>>> > >
>>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
>>> > > Method)
>>> > >         at
>>> > >
>>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>>> > >         at
>>> > >
>>> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>>> > >         at java.base/java.lang.reflect.Method.invoke(Method.java:566)
>>> > >         at
>>> py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
>>> > >         at
>>> > > py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
>>> > >         at py4j.Gateway.invoke(Gateway.java:282)
>>> > >         at
>>> > > py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
>>> > >         at py4j.commands.CallCommand.execute(CallCommand.java:79)
>>> > >         at
>>> > >
>>> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
>>> > >         at
>>> py4j.ClientServerConnection.run(ClientServerConnection.java:106)
>>> > >         at java.base/java.lang.Thread.run(Thread.java:829)
>>> > > Caused by: org.apache.spark.SparkException: Job aborted due to stage
>>> > > failure: Task 2 in stage 26.0 failed 4 times, most recent failure:
>>> Lost
>>> > > task 2.3 in stage 26.0 (TID 9558) (10.42.240.3 executor 1):
>>> > > java.io.IOException: Mkdirs failed to create
>>> > >
>>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > >
>>> > > Driver stacktrace:
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
>>> > >         at
>>> > >
>>> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
>>> > >         at
>>> > >
>>> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
>>> > >         at
>>> > > scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
>>> > >         at scala.Option.foreach(Option.scala:407)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
>>> > >         at
>>> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
>>> > >         at
>>> > >
>>> org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
>>> > >         at
>>> org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
>>> > >         ... 42 more
>>> > > Caused by: java.io.IOException: Mkdirs failed to create
>>> > >
>>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
>>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>>> > >         at
>>> > >
>>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>>> > >         at
>>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>>> > >         at
>>> > >
>>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>>> > >         at
>>> > >
>>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>>> > >         at
>>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>>> > >         at
>>> > >
>>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>>> > >         at
>>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>>> > >         at
>>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>>> Source)
>>> > >         at
>>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>>> Source)
>>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 3.0 in stage 26.0
>>> (TID
>>> > > 9544) (10.42.0.18 executor 12): TaskKilled (Stage cancelled)
>>> > >
>>> > >
>>> > >
>>> > >
>>> > >
>>> > >
>>> > >
>>> > >
>>> > >
>>> > >
>>> > > ---------------------------------------------------------------------
>>> > > To unsubscribe e-mail: user-unsubscribe@spark.apache.org
>>> > >
>>> > >
>>> >
>>>
>>> ---------------------------------------------------------------------
>>> To unsubscribe e-mail: user-unsubscribe@spark.apache.org
>>>
>>>

Re: Can’t write to PVC in K8S

Posted by Mich Talebzadeh <mi...@gmail.com>.
Forgot to mention that Spark uses that work directory to unzip the zipped
files or gunzip archive files

For example

pyFiles                 gs://axial-glow-224522-spark-on-k8s/codes/DSBQ.zip


Spark will use that $SPARK_HOME/work-dir to unzip DSBQ.zip which is the
application package here


The alternative is to hack the docker file to create a directory for
yourself


RUN mkdir -p /home/conf

RUN chmod g+w /home/conf


HTH


*Disclaimer:* Use it at your own risk. Any and all responsibility for any
loss, damage or destruction of data or any other property which may arise
from relying on this email's technical content is explicitly disclaimed.
The author will in no case be liable for any monetary damages arising from
such loss, damage or destruction.






On Mon, 30 Aug 2021 at 22:13, Mich Talebzadeh <mi...@gmail.com>
wrote:

> I am not familiar with  jupyterlab  so cannot comment on that.
>
> However, once your parquet file is written to the work-dir, how are you
> going to utilise it?
>
> HTH
>
>
>
>
>    view my Linkedin profile
> <https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>
>
>
>
> *Disclaimer:* Use it at your own risk. Any and all responsibility for any
> loss, damage or destruction of data or any other property which may arise
> from relying on this email's technical content is explicitly disclaimed.
> The author will in no case be liable for any monetary damages arising from
> such loss, damage or destruction.
>
>
>
>
> On Mon, 30 Aug 2021 at 22:05, Bjørn Jørgensen <bj...@gmail.com>
> wrote:
>
>> ok, so when I use spark on k8s I can only save files to s3 buckets or to
>> a database?
>>
>> Note my setup, its spark with jupyterlab on top on k8s.
>>
>> What are those for if I cant write files from spark in k8s to disk?
>>
>> "spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
>> "False"
>> "spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
>> "False"
>>
>> On 2021/08/30 20:50:22, Mich Talebzadeh <mi...@gmail.com>
>> wrote:
>> > Hi,
>> >
>> > You are trying to write to work-dir inside the docker and create
>> > sub-directories:
>> >
>> > The error you are getting is this
>> >
>> > Mkdirs failed to create
>> >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304682784428756208427_0026_m_000000_9563
>> > (exists=false, cwd=file:/opt/spark/work-dir)
>> >
>> > That directory /work-dir is not recognised as a valid directory
>> > for storage. It is not in HDFS or HCFS format
>> >
>> >
>> > From Spark you can write to a bucket outside as a permanent storage.
>> >
>> > HTH
>> >
>> >
>> >    view my Linkedin profile
>> > <https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>
>> >
>> >
>> >
>> > *Disclaimer:* Use it at your own risk. Any and all responsibility for
>> any
>> > loss, damage or destruction of data or any other property which may
>> arise
>> > from relying on this email's technical content is explicitly disclaimed.
>> > The author will in no case be liable for any monetary damages arising
>> from
>> > such loss, damage or destruction.
>> >
>> >
>> >
>> >
>> > On Mon, 30 Aug 2021 at 14:11, Bjørn Jørgensen <bjornjorgensen@gmail.com
>> >
>> > wrote:
>> >
>> > > Hi, I have built and running spark on k8s. A link to my repo
>> > > https://github.com/bjornjorgensen/jlpyk8s
>> > >
>> > > Everything seems to be running fine, but I can’t save to PVC.
>> > > If I convert the dataframe to pandas, then I can save it.
>> > >
>> > >
>> > >
>> > > from pyspark.sql import SparkSession
>> > > spark = SparkSession.builder \
>> > >     .master("k8s://https://kubernetes.default.svc.cluster.local:443")
>> \
>> > >     .config("spark.kubernetes.container.image",
>> > > "bjornjorgensen/spark-py:v3.2-290821") \
>> > >     .config("spark.kubernetes.authenticate.caCertFile",
>> "/var/run/secrets/
>> > > kubernetes.io/serviceaccount/ca.crt") \
>> > >     .config("spark.kubernetes.authenticate.oauthTokenFile",
>> > > "/var/run/secrets/kubernetes.io/serviceaccount/token") \
>> > >     .config("spark.kubernetes.authenticate.driver.serviceAccountName",
>> > > "my-pyspark-notebook") \
>> > >     .config("spark.executor.instances", "10") \
>> > >     .config("spark.driver.host",
>> > > "my-pyspark-notebook-spark-driver.default.svc.cluster.local") \
>> > >     .config("spark.driver.port", "29413") \
>> > >
>> > >
>> .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.options.claimName",
>> > > "nfs100") \
>> > >
>> > >
>> .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.path",
>> > > "/opt/spark/work-dir") \
>> > >
>> > >
>> .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.options.claimName",
>> > > "nfs100") \
>> > >
>> > >
>> .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.path",
>> > > "/opt/spark/work-dir") \
>> > >
>> > >
>> .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
>> > > "False") \
>> > >
>> > >
>> .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
>> > > "False") \
>> > >     .appName("myApp") \
>> > >     .config("spark.sql.repl.eagerEval.enabled", "True") \
>> > >     .config("spark.driver.memory", "4g") \
>> > >     .config("spark.executor.memory", "4g") \
>> > >     .getOrCreate()
>> > > sc = spark.sparkContext
>> > >
>> > > pdf.to_parquet("/opt/spark/work-dir/falk/test/F01test.parquet")
>> > >
>> > >
>> > > 21/08/30 12:20:34 WARN WindowExec: No Partition Defined for Window
>> > > operation! Moving all data to a single partition, this can cause
>> serious
>> > > performance degradation.
>> > > 21/08/30 12:20:34 WARN WindowExec: No Partition Defined for Window
>> > > operation! Moving all data to a single partition, this can cause
>> serious
>> > > performance degradation.
>> > > 21/08/30 12:20:37 WARN WindowExec: No Partition Defined for Window
>> > > operation! Moving all data to a single partition, this can cause
>> serious
>> > > performance degradation.
>> > > 21/08/30 12:20:39 WARN TaskSetManager: Lost task 0.0 in stage 25.0
>> (TID
>> > > 9497) (10.42.0.16 executor 3): java.io.IOException: Mkdirs failed to
>> create
>> > >
>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220375889526593865835092_0025_m_000000_9497
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/30 12:20:40 WARN TaskSetManager: Lost task 0.1 in stage 25.0
>> (TID
>> > > 9498) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220371965695886629589207_0025_m_000000_9498
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/30 12:20:42 WARN TaskSetManager: Lost task 0.2 in stage 25.0
>> (TID
>> > > 9499) (10.42.240.4 executor 4): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220378533320694235394580_0025_m_000000_9499
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/30 12:20:43 WARN TaskSetManager: Lost task 0.3 in stage 25.0
>> (TID
>> > > 9500) (10.42.32.15 executor 10): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/30 12:20:43 ERROR TaskSetManager: Task 0 in stage 25.0 failed 4
>> > > times; aborting job
>> > > 21/08/30 12:20:43 ERROR FileFormatWriter: Aborting job
>> > > d98cdc60-bb44-4189-b483-8449fc793658.
>> > > org.apache.spark.SparkException: Job aborted due to stage failure:
>> Task 0
>> > > in stage 25.0 failed 4 times, most recent failure: Lost task 0.3 in
>> stage
>> > > 25.0 (TID 9500) (10.42.32.15 executor 10): java.io.IOException: Mkdirs
>> > > failed to create
>> > >
>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > Driver stacktrace:
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
>> > >         at
>> > >
>> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
>> > >         at
>> > >
>> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
>> > >         at
>> > > scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
>> > >         at scala.Option.foreach(Option.scala:407)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
>> > >         at
>> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
>> > >         at
>> > > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
>> > >         at
>> org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
>> > >         at
>> > > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
>> > >         at
>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
>> > >
>> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
>> > >         at
>> > >
>> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
>> > >         at
>> > >
>> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
>> > >         at
>> > >
>> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
>> > >         at
>> > > org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
>> > >         at
>> > > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
>> > > Method)
>> > >         at
>> > >
>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>> > >         at
>> > >
>> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>> > >         at java.base/java.lang.reflect.Method.invoke(Method.java:566)
>> > >         at
>> py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
>> > >         at
>> > > py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
>> > >         at py4j.Gateway.invoke(Gateway.java:282)
>> > >         at
>> > > py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
>> > >         at py4j.commands.CallCommand.execute(CallCommand.java:79)
>> > >         at
>> > >
>> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
>> > >         at
>> py4j.ClientServerConnection.run(ClientServerConnection.java:106)
>> > >         at java.base/java.lang.Thread.run(Thread.java:829)
>> > > Caused by: java.io.IOException: Mkdirs failed to create
>> > >
>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> ---------------------------------------------------------------------------
>> > > Py4JJavaError                             Traceback (most recent call
>> last)
>> > > /tmp/ipykernel_80/163396320.py in <module>
>> > > ----> 1
>> pdf.to_parquet("/opt/spark/work-dir/falk/test/F01test.parquet")
>> > >
>> > > /opt/spark/python/pyspark/pandas/frame.py in to_parquet(self, path,
>> mode,
>> > > partition_cols, compression, index_col, **options)
>> > >    4721         if compression is not None:
>> > >    4722             builder.option("compression", compression)
>> > > -> 4723
>>  builder.options(**options).format("parquet").save(path)
>> > >    4724
>> > >    4725     def to_orc(
>> > >
>> > > /opt/spark/python/pyspark/sql/readwriter.py in save(self, path,
>> format,
>> > > mode, partitionBy, **options)
>> > >     738             self._jwrite.save()
>> > >     739         else:
>> > > --> 740             self._jwrite.save(path)
>> > >     741
>> > >     742     @since(1.4)
>> > >
>> > > /opt/conda/lib/python3.9/site-packages/py4j/java_gateway.py in
>> > > __call__(self, *args)
>> > >    1307
>> > >    1308         answer = self.gateway_client.send_command(command)
>> > > -> 1309         return_value = get_return_value(
>> > >    1310             answer, self.gateway_client, self.target_id,
>> self.name
>> > > )
>> > >    1311
>> > >
>> > > /opt/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
>> > >     109     def deco(*a, **kw):
>> > >     110         try:
>> > > --> 111             return f(*a, **kw)
>> > >     112         except py4j.protocol.Py4JJavaError as e:
>> > >     113             converted = convert_exception(e.java_exception)
>> > >
>> > > /opt/conda/lib/python3.9/site-packages/py4j/protocol.py in
>> > > get_return_value(answer, gateway_client, target_id, name)
>> > >     324             value = OUTPUT_CONVERTER[type](answer[2:],
>> > > gateway_client)
>> > >     325             if answer[1] == REFERENCE_TYPE:
>> > > --> 326                 raise Py4JJavaError(
>> > >     327                     "An error occurred while calling
>> {0}{1}{2}.\n".
>> > >     328                     format(target_id, ".", name), value)
>> > >
>> > > Py4JJavaError: An error occurred while calling o4804.save.
>> > > : org.apache.spark.SparkException: Job aborted.
>> > >         at
>> > >
>> org.apache.spark.sql.errors.QueryExecutionErrors$.jobAbortedError(QueryExecutionErrors.scala:496)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:251)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
>> > >         at
>> > > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
>> > >         at
>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
>> > >
>> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
>> > >         at
>> > >
>> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
>> > >         at
>> > >
>> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
>> > >         at
>> > >
>> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
>> > >         at
>> > > org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
>> > >         at
>> > > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
>> > > Method)
>> > >         at
>> > >
>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>> > >         at
>> > >
>> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>> > >         at java.base/java.lang.reflect.Method.invoke(Method.java:566)
>> > >         at
>> py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
>> > >         at
>> > > py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
>> > >         at py4j.Gateway.invoke(Gateway.java:282)
>> > >         at
>> > > py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
>> > >         at py4j.commands.CallCommand.execute(CallCommand.java:79)
>> > >         at
>> > >
>> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
>> > >         at
>> py4j.ClientServerConnection.run(ClientServerConnection.java:106)
>> > >         at java.base/java.lang.Thread.run(Thread.java:829)
>> > > Caused by: org.apache.spark.SparkException: Job aborted due to stage
>> > > failure: Task 0 in stage 25.0 failed 4 times, most recent failure:
>> Lost
>> > > task 0.3 in stage 25.0 (TID 9500) (10.42.32.15 executor 10):
>> > > java.io.IOException: Mkdirs failed to create
>> > >
>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > Driver stacktrace:
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
>> > >         at
>> > >
>> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
>> > >         at
>> > >
>> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
>> > >         at
>> > > scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
>> > >         at scala.Option.foreach(Option.scala:407)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
>> > >         at
>> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
>> > >         at
>> > > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
>> > >         at
>> org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
>> > >         ... 41 more
>> > > Caused by: java.io.IOException: Mkdirs failed to create
>> > >
>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > >
>> > >
>> > >
>> > > df.write.parquet("/opt/spark/work-dir/falk/F01test_df.parquet",
>> > > mode="overwrite")
>> > >
>> > >
>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 2.0 in stage 26.0
>> (TID
>> > > 9543) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_20210829190630570334759957727637_0026_m_000002_9543
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 1.0 in stage 26.0
>> (TID
>> > > 9542) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906306992160257769852924_0026_m_000001_9542
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 4.0 in stage 26.0
>> (TID
>> > > 9545) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to
>> create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906305635902832664702349_0026_m_000004_9545
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 10.0 in stage 26.0
>> (TID
>> > > 9551) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303695223706240035696_0026_m_000010_9551
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 2.1 in stage 26.0
>> (TID
>> > > 9552) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303153023682655991980_0026_m_000002_9552
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 1.1 in stage 26.0
>> (TID
>> > > 9553) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to
>> create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303560179987922074406_0026_m_000001_9553
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 2.2 in stage 26.0
>> (TID
>> > > 9556) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to
>> create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906307237274992181823763_0026_m_000002_9556
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 4.1 in stage 26.0
>> (TID
>> > > 9554) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906306864123848918470508_0026_m_000004_9554
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 10.1 in stage 26.0
>> (TID
>> > > 9555) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302601970797047480301_0026_m_000010_9555
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 0.0 in stage 26.0
>> (TID
>> > > 9541) (10.42.192.9 executor 8): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304154332762277555982_0026_m_000000_9541
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 7.0 in stage 26.0
>> (TID
>> > > 9548) (10.42.240.4 executor 4): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_20210829190630694656629969727231_0026_m_000007_9548
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 4.2 in stage 26.0
>> (TID
>> > > 9559) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303848774759656984701_0026_m_000004_9559
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 2.3 in stage 26.0
>> (TID
>> > > 9558) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:31 ERROR TaskSetManager: Task 2 in stage 26.0 failed 4
>> > > times; aborting job
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 1.2 in stage 26.0
>> (TID
>> > > 9557) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to
>> create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302130961573080351978_0026_m_000001_9557
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 8.0 in stage 26.0
>> (TID
>> > > 9549) (10.42.0.17 executor 6): java.io.IOException: Mkdirs failed to
>> create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906306776907150898092479_0026_m_000008_9549
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 5.0 in stage 26.0
>> (TID
>> > > 9546) (10.42.96.9 executor 7): java.io.IOException: Mkdirs failed to
>> create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906301725332653584503335_0026_m_000005_9546
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 11.0 in stage 26.0
>> (TID
>> > > 9561) (10.42.240.4 executor 4): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302182889441465469285_0026_m_000011_9561
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 0.1 in stage 26.0
>> (TID
>> > > 9563) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304682784428756208427_0026_m_000000_9563
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:31 ERROR FileFormatWriter: Aborting job
>> > > 115cf3ce-5a9b-4274-8752-b6ead281f104.
>> > > org.apache.spark.SparkException: Job aborted due to stage failure:
>> Task 2
>> > > in stage 26.0 failed 4 times, most recent failure: Lost task 2.3 in
>> stage
>> > > 26.0 (TID 9558) (10.42.240.3 executor 1): java.io.IOException: Mkdirs
>> > > failed to create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > Driver stacktrace:
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
>> > >         at
>> > >
>> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
>> > >         at
>> > >
>> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
>> > >         at
>> > > scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
>> > >         at scala.Option.foreach(Option.scala:407)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
>> > >         at
>> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
>> > >         at
>> > > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
>> > >         at
>> org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
>> > >         at
>> > > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
>> > >         at
>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
>> > >
>> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
>> > >         at
>> > >
>> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
>> > >         at
>> > >
>> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
>> > >         at
>> > >
>> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
>> > >         at
>> > > org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
>> > >         at
>> > >
>> org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:781)
>> > >         at
>> > > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
>> > > Method)
>> > >         at
>> > >
>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>> > >         at
>> > >
>> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>> > >         at java.base/java.lang.reflect.Method.invoke(Method.java:566)
>> > >         at
>> py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
>> > >         at
>> > > py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
>> > >         at py4j.Gateway.invoke(Gateway.java:282)
>> > >         at
>> > > py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
>> > >         at py4j.commands.CallCommand.execute(CallCommand.java:79)
>> > >         at
>> > >
>> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
>> > >         at
>> py4j.ClientServerConnection.run(ClientServerConnection.java:106)
>> > >         at java.base/java.lang.Thread.run(Thread.java:829)
>> > > Caused by: java.io.IOException: Mkdirs failed to create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 7.1 in stage 26.0
>> (TID
>> > > 9562) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304432517406660432032_0026_m_000007_9562
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 9.0 in stage 26.0
>> (TID
>> > > 9550) (10.42.128.10 executor 9): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906305148471752983202631_0026_m_000009_9550
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 14.0 in stage 26.0
>> (TID
>> > > 9566) (10.42.96.9 executor 7): TaskKilled (Stage cancelled)
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 6.0 in stage 26.0
>> (TID
>> > > 9547) (10.42.32.12 executor 11): TaskKilled (Stage cancelled)
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 13.0 in stage 26.0
>> (TID
>> > > 9565) (10.42.0.17 executor 6): TaskKilled (Stage cancelled)
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 12.0 in stage 26.0
>> (TID
>> > > 9564) (10.42.0.12 executor 3): TaskKilled (Stage cancelled)
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 10.2 in stage 26.0
>> (TID
>> > > 9560) (10.42.192.9 executor 8): TaskKilled (Stage cancelled)
>> > > [Stage 26:>                                                       (0
>> + 1)
>> > > / 132]
>> > >
>> ---------------------------------------------------------------------------
>> > > Py4JJavaError                             Traceback (most recent call
>> last)
>> > > /tmp/ipykernel_80/610855484.py in <module>
>> > > ----> 1
>> df.write.parquet("/opt/spark/work-dir/falk/F01test_df.parquet",
>> > > mode="overwrite")
>> > >
>> > > /opt/spark/python/pyspark/sql/readwriter.py in parquet(self, path,
>> mode,
>> > > partitionBy, compression)
>> > >     883             self.partitionBy(partitionBy)
>> > >     884         self._set_opts(compression=compression)
>> > > --> 885         self._jwrite.parquet(path)
>> > >     886
>> > >     887     def text(self, path, compression=None, lineSep=None):
>> > >
>> > > /opt/conda/lib/python3.9/site-packages/py4j/java_gateway.py in
>> > > __call__(self, *args)
>> > >    1307
>> > >    1308         answer = self.gateway_client.send_command(command)
>> > > -> 1309         return_value = get_return_value(
>> > >    1310             answer, self.gateway_client, self.target_id,
>> self.name
>> > > )
>> > >    1311
>> > >
>> > > /opt/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
>> > >     109     def deco(*a, **kw):
>> > >     110         try:
>> > > --> 111             return f(*a, **kw)
>> > >     112         except py4j.protocol.Py4JJavaError as e:
>> > >     113             converted = convert_exception(e.java_exception)
>> > >
>> > > /opt/conda/lib/python3.9/site-packages/py4j/protocol.py in
>> > > get_return_value(answer, gateway_client, target_id, name)
>> > >     324             value = OUTPUT_CONVERTER[type](answer[2:],
>> > > gateway_client)
>> > >     325             if answer[1] == REFERENCE_TYPE:
>> > > --> 326                 raise Py4JJavaError(
>> > >     327                     "An error occurred while calling
>> {0}{1}{2}.\n".
>> > >     328                     format(target_id, ".", name), value)
>> > >
>> > > Py4JJavaError: An error occurred while calling o15435.parquet.
>> > > : org.apache.spark.SparkException: Job aborted.
>> > >         at
>> > >
>> org.apache.spark.sql.errors.QueryExecutionErrors$.jobAbortedError(QueryExecutionErrors.scala:496)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:251)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
>> > >         at
>> > > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
>> > >         at
>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
>> > >
>> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
>> > >         at
>> > >
>> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
>> > >         at
>> > >
>> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
>> > >         at
>> > >
>> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
>> > >         at
>> > > org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
>> > >         at
>> > >
>> org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:781)
>> > >         at
>> > > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
>> > > Method)
>> > >         at
>> > >
>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>> > >         at
>> > >
>> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>> > >         at java.base/java.lang.reflect.Method.invoke(Method.java:566)
>> > >         at
>> py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
>> > >         at
>> > > py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
>> > >         at py4j.Gateway.invoke(Gateway.java:282)
>> > >         at
>> > > py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
>> > >         at py4j.commands.CallCommand.execute(CallCommand.java:79)
>> > >         at
>> > >
>> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
>> > >         at
>> py4j.ClientServerConnection.run(ClientServerConnection.java:106)
>> > >         at java.base/java.lang.Thread.run(Thread.java:829)
>> > > Caused by: org.apache.spark.SparkException: Job aborted due to stage
>> > > failure: Task 2 in stage 26.0 failed 4 times, most recent failure:
>> Lost
>> > > task 2.3 in stage 26.0 (TID 9558) (10.42.240.3 executor 1):
>> > > java.io.IOException: Mkdirs failed to create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > Driver stacktrace:
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
>> > >         at
>> > >
>> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
>> > >         at
>> > >
>> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
>> > >         at
>> > > scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
>> > >         at scala.Option.foreach(Option.scala:407)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
>> > >         at
>> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
>> > >         at
>> > > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
>> > >         at
>> org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
>> > >         ... 42 more
>> > > Caused by: java.io.IOException: Mkdirs failed to create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 3.0 in stage 26.0
>> (TID
>> > > 9544) (10.42.0.18 executor 12): TaskKilled (Stage cancelled)
>> > >
>> > >
>> > >
>> > >
>> > >
>> > >
>> > >
>> > >
>> > >
>> > >
>> > > ---------------------------------------------------------------------
>> > > To unsubscribe e-mail: user-unsubscribe@spark.apache.org
>> > >
>> > >
>> >
>>
>> ---------------------------------------------------------------------
>> To unsubscribe e-mail: user-unsubscribe@spark.apache.org
>>
>>

Re: Can’t write to PVC in K8S

Posted by Holden Karau <ho...@pigscanfly.ca>.
You can totally write to PVCs, although it's a bit complicated. Since
you've got a shared NFS PVC that makes things a bit easier.

I think this is probably just a permission issue, the Spark user ID needs
to have read-write permission to that directory.
If that is the case, there are a few ways you can fix this (change the UID
Spark is running in your Dockerfile to match that of your NFS mount), give
the spark UID permission to create those folders, or use a sidecar to
create the workdir & chown/chgrp it over to Spark (kind of a pain but you
can see this pattern with Minio's setup).

On Mon, Aug 30, 2021 at 2:15 PM Mich Talebzadeh <mi...@gmail.com>
wrote:

> I am not familiar with  jupyterlab  so cannot comment on that.
>
> However, once your parquet file is written to the work-dir, how are you
> going to utilise it?
>
> HTH
>
>
>
>
>    view my Linkedin profile
> <https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>
>
>
>
> *Disclaimer:* Use it at your own risk. Any and all responsibility for any
> loss, damage or destruction of data or any other property which may arise
> from relying on this email's technical content is explicitly disclaimed.
> The author will in no case be liable for any monetary damages arising from
> such loss, damage or destruction.
>
>
>
>
> On Mon, 30 Aug 2021 at 22:05, Bjørn Jørgensen <bj...@gmail.com>
> wrote:
>
>> ok, so when I use spark on k8s I can only save files to s3 buckets or to
>> a database?
>>
>> Note my setup, its spark with jupyterlab on top on k8s.
>>
>> What are those for if I cant write files from spark in k8s to disk?
>>
>> "spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
>> "False"
>> "spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
>> "False"
>>
>> On 2021/08/30 20:50:22, Mich Talebzadeh <mi...@gmail.com>
>> wrote:
>> > Hi,
>> >
>> > You are trying to write to work-dir inside the docker and create
>> > sub-directories:
>> >
>> > The error you are getting is this
>> >
>> > Mkdirs failed to create
>> >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304682784428756208427_0026_m_000000_9563
>> > (exists=false, cwd=file:/opt/spark/work-dir)
>> >
>> > That directory /work-dir is not recognised as a valid directory
>> > for storage. It is not in HDFS or HCFS format
>> >
>> >
>> > From Spark you can write to a bucket outside as a permanent storage.
>> >
>> > HTH
>> >
>> >
>> >    view my Linkedin profile
>> > <https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>
>> >
>> >
>> >
>> > *Disclaimer:* Use it at your own risk. Any and all responsibility for
>> any
>> > loss, damage or destruction of data or any other property which may
>> arise
>> > from relying on this email's technical content is explicitly disclaimed.
>> > The author will in no case be liable for any monetary damages arising
>> from
>> > such loss, damage or destruction.
>> >
>> >
>> >
>> >
>> > On Mon, 30 Aug 2021 at 14:11, Bjørn Jørgensen <bjornjorgensen@gmail.com
>> >
>> > wrote:
>> >
>> > > Hi, I have built and running spark on k8s. A link to my repo
>> > > https://github.com/bjornjorgensen/jlpyk8s
>> > >
>> > > Everything seems to be running fine, but I can’t save to PVC.
>> > > If I convert the dataframe to pandas, then I can save it.
>> > >
>> > >
>> > >
>> > > from pyspark.sql import SparkSession
>> > > spark = SparkSession.builder \
>> > >     .master("k8s://https://kubernetes.default.svc.cluster.local:443")
>> \
>> > >     .config("spark.kubernetes.container.image",
>> > > "bjornjorgensen/spark-py:v3.2-290821") \
>> > >     .config("spark.kubernetes.authenticate.caCertFile",
>> "/var/run/secrets/
>> > > kubernetes.io/serviceaccount/ca.crt") \
>> > >     .config("spark.kubernetes.authenticate.oauthTokenFile",
>> > > "/var/run/secrets/kubernetes.io/serviceaccount/token") \
>> > >     .config("spark.kubernetes.authenticate.driver.serviceAccountName",
>> > > "my-pyspark-notebook") \
>> > >     .config("spark.executor.instances", "10") \
>> > >     .config("spark.driver.host",
>> > > "my-pyspark-notebook-spark-driver.default.svc.cluster.local") \
>> > >     .config("spark.driver.port", "29413") \
>> > >
>> > >
>> .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.options.claimName",
>> > > "nfs100") \
>> > >
>> > >
>> .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.path",
>> > > "/opt/spark/work-dir") \
>> > >
>> > >
>> .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.options.claimName",
>> > > "nfs100") \
>> > >
>> > >
>> .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.path",
>> > > "/opt/spark/work-dir") \
>> > >
>> > >
>> .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
>> > > "False") \
>> > >
>> > >
>> .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
>> > > "False") \
>> > >     .appName("myApp") \
>> > >     .config("spark.sql.repl.eagerEval.enabled", "True") \
>> > >     .config("spark.driver.memory", "4g") \
>> > >     .config("spark.executor.memory", "4g") \
>> > >     .getOrCreate()
>> > > sc = spark.sparkContext
>> > >
>> > > pdf.to_parquet("/opt/spark/work-dir/falk/test/F01test.parquet")
>> > >
>> > >
>> > > 21/08/30 12:20:34 WARN WindowExec: No Partition Defined for Window
>> > > operation! Moving all data to a single partition, this can cause
>> serious
>> > > performance degradation.
>> > > 21/08/30 12:20:34 WARN WindowExec: No Partition Defined for Window
>> > > operation! Moving all data to a single partition, this can cause
>> serious
>> > > performance degradation.
>> > > 21/08/30 12:20:37 WARN WindowExec: No Partition Defined for Window
>> > > operation! Moving all data to a single partition, this can cause
>> serious
>> > > performance degradation.
>> > > 21/08/30 12:20:39 WARN TaskSetManager: Lost task 0.0 in stage 25.0
>> (TID
>> > > 9497) (10.42.0.16 executor 3): java.io.IOException: Mkdirs failed to
>> create
>> > >
>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220375889526593865835092_0025_m_000000_9497
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/30 12:20:40 WARN TaskSetManager: Lost task 0.1 in stage 25.0
>> (TID
>> > > 9498) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220371965695886629589207_0025_m_000000_9498
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/30 12:20:42 WARN TaskSetManager: Lost task 0.2 in stage 25.0
>> (TID
>> > > 9499) (10.42.240.4 executor 4): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220378533320694235394580_0025_m_000000_9499
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/30 12:20:43 WARN TaskSetManager: Lost task 0.3 in stage 25.0
>> (TID
>> > > 9500) (10.42.32.15 executor 10): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/30 12:20:43 ERROR TaskSetManager: Task 0 in stage 25.0 failed 4
>> > > times; aborting job
>> > > 21/08/30 12:20:43 ERROR FileFormatWriter: Aborting job
>> > > d98cdc60-bb44-4189-b483-8449fc793658.
>> > > org.apache.spark.SparkException: Job aborted due to stage failure:
>> Task 0
>> > > in stage 25.0 failed 4 times, most recent failure: Lost task 0.3 in
>> stage
>> > > 25.0 (TID 9500) (10.42.32.15 executor 10): java.io.IOException: Mkdirs
>> > > failed to create
>> > >
>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > Driver stacktrace:
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
>> > >         at
>> > >
>> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
>> > >         at
>> > >
>> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
>> > >         at
>> > > scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
>> > >         at scala.Option.foreach(Option.scala:407)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
>> > >         at
>> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
>> > >         at
>> > > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
>> > >         at
>> org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
>> > >         at
>> > > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
>> > >         at
>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
>> > >
>> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
>> > >         at
>> > >
>> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
>> > >         at
>> > >
>> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
>> > >         at
>> > >
>> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
>> > >         at
>> > > org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
>> > >         at
>> > > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
>> > > Method)
>> > >         at
>> > >
>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>> > >         at
>> > >
>> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>> > >         at java.base/java.lang.reflect.Method.invoke(Method.java:566)
>> > >         at
>> py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
>> > >         at
>> > > py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
>> > >         at py4j.Gateway.invoke(Gateway.java:282)
>> > >         at
>> > > py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
>> > >         at py4j.commands.CallCommand.execute(CallCommand.java:79)
>> > >         at
>> > >
>> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
>> > >         at
>> py4j.ClientServerConnection.run(ClientServerConnection.java:106)
>> > >         at java.base/java.lang.Thread.run(Thread.java:829)
>> > > Caused by: java.io.IOException: Mkdirs failed to create
>> > >
>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> ---------------------------------------------------------------------------
>> > > Py4JJavaError                             Traceback (most recent call
>> last)
>> > > /tmp/ipykernel_80/163396320.py in <module>
>> > > ----> 1
>> pdf.to_parquet("/opt/spark/work-dir/falk/test/F01test.parquet")
>> > >
>> > > /opt/spark/python/pyspark/pandas/frame.py in to_parquet(self, path,
>> mode,
>> > > partition_cols, compression, index_col, **options)
>> > >    4721         if compression is not None:
>> > >    4722             builder.option("compression", compression)
>> > > -> 4723
>>  builder.options(**options).format("parquet").save(path)
>> > >    4724
>> > >    4725     def to_orc(
>> > >
>> > > /opt/spark/python/pyspark/sql/readwriter.py in save(self, path,
>> format,
>> > > mode, partitionBy, **options)
>> > >     738             self._jwrite.save()
>> > >     739         else:
>> > > --> 740             self._jwrite.save(path)
>> > >     741
>> > >     742     @since(1.4)
>> > >
>> > > /opt/conda/lib/python3.9/site-packages/py4j/java_gateway.py in
>> > > __call__(self, *args)
>> > >    1307
>> > >    1308         answer = self.gateway_client.send_command(command)
>> > > -> 1309         return_value = get_return_value(
>> > >    1310             answer, self.gateway_client, self.target_id,
>> self.name
>> > > )
>> > >    1311
>> > >
>> > > /opt/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
>> > >     109     def deco(*a, **kw):
>> > >     110         try:
>> > > --> 111             return f(*a, **kw)
>> > >     112         except py4j.protocol.Py4JJavaError as e:
>> > >     113             converted = convert_exception(e.java_exception)
>> > >
>> > > /opt/conda/lib/python3.9/site-packages/py4j/protocol.py in
>> > > get_return_value(answer, gateway_client, target_id, name)
>> > >     324             value = OUTPUT_CONVERTER[type](answer[2:],
>> > > gateway_client)
>> > >     325             if answer[1] == REFERENCE_TYPE:
>> > > --> 326                 raise Py4JJavaError(
>> > >     327                     "An error occurred while calling
>> {0}{1}{2}.\n".
>> > >     328                     format(target_id, ".", name), value)
>> > >
>> > > Py4JJavaError: An error occurred while calling o4804.save.
>> > > : org.apache.spark.SparkException: Job aborted.
>> > >         at
>> > >
>> org.apache.spark.sql.errors.QueryExecutionErrors$.jobAbortedError(QueryExecutionErrors.scala:496)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:251)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
>> > >         at
>> > > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
>> > >         at
>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
>> > >
>> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
>> > >         at
>> > >
>> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
>> > >         at
>> > >
>> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
>> > >         at
>> > >
>> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
>> > >         at
>> > > org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
>> > >         at
>> > > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
>> > > Method)
>> > >         at
>> > >
>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>> > >         at
>> > >
>> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>> > >         at java.base/java.lang.reflect.Method.invoke(Method.java:566)
>> > >         at
>> py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
>> > >         at
>> > > py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
>> > >         at py4j.Gateway.invoke(Gateway.java:282)
>> > >         at
>> > > py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
>> > >         at py4j.commands.CallCommand.execute(CallCommand.java:79)
>> > >         at
>> > >
>> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
>> > >         at
>> py4j.ClientServerConnection.run(ClientServerConnection.java:106)
>> > >         at java.base/java.lang.Thread.run(Thread.java:829)
>> > > Caused by: org.apache.spark.SparkException: Job aborted due to stage
>> > > failure: Task 0 in stage 25.0 failed 4 times, most recent failure:
>> Lost
>> > > task 0.3 in stage 25.0 (TID 9500) (10.42.32.15 executor 10):
>> > > java.io.IOException: Mkdirs failed to create
>> > >
>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > Driver stacktrace:
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
>> > >         at
>> > >
>> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
>> > >         at
>> > >
>> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
>> > >         at
>> > > scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
>> > >         at scala.Option.foreach(Option.scala:407)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
>> > >         at
>> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
>> > >         at
>> > > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
>> > >         at
>> org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
>> > >         ... 41 more
>> > > Caused by: java.io.IOException: Mkdirs failed to create
>> > >
>> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > >
>> > >
>> > >
>> > > df.write.parquet("/opt/spark/work-dir/falk/F01test_df.parquet",
>> > > mode="overwrite")
>> > >
>> > >
>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 2.0 in stage 26.0
>> (TID
>> > > 9543) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_20210829190630570334759957727637_0026_m_000002_9543
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 1.0 in stage 26.0
>> (TID
>> > > 9542) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906306992160257769852924_0026_m_000001_9542
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 4.0 in stage 26.0
>> (TID
>> > > 9545) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to
>> create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906305635902832664702349_0026_m_000004_9545
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 10.0 in stage 26.0
>> (TID
>> > > 9551) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303695223706240035696_0026_m_000010_9551
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 2.1 in stage 26.0
>> (TID
>> > > 9552) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303153023682655991980_0026_m_000002_9552
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 1.1 in stage 26.0
>> (TID
>> > > 9553) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to
>> create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303560179987922074406_0026_m_000001_9553
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 2.2 in stage 26.0
>> (TID
>> > > 9556) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to
>> create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906307237274992181823763_0026_m_000002_9556
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 4.1 in stage 26.0
>> (TID
>> > > 9554) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906306864123848918470508_0026_m_000004_9554
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 10.1 in stage 26.0
>> (TID
>> > > 9555) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302601970797047480301_0026_m_000010_9555
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 0.0 in stage 26.0
>> (TID
>> > > 9541) (10.42.192.9 executor 8): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304154332762277555982_0026_m_000000_9541
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 7.0 in stage 26.0
>> (TID
>> > > 9548) (10.42.240.4 executor 4): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_20210829190630694656629969727231_0026_m_000007_9548
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 4.2 in stage 26.0
>> (TID
>> > > 9559) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303848774759656984701_0026_m_000004_9559
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 2.3 in stage 26.0
>> (TID
>> > > 9558) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:31 ERROR TaskSetManager: Task 2 in stage 26.0 failed 4
>> > > times; aborting job
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 1.2 in stage 26.0
>> (TID
>> > > 9557) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to
>> create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302130961573080351978_0026_m_000001_9557
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 8.0 in stage 26.0
>> (TID
>> > > 9549) (10.42.0.17 executor 6): java.io.IOException: Mkdirs failed to
>> create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906306776907150898092479_0026_m_000008_9549
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 5.0 in stage 26.0
>> (TID
>> > > 9546) (10.42.96.9 executor 7): java.io.IOException: Mkdirs failed to
>> create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906301725332653584503335_0026_m_000005_9546
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 11.0 in stage 26.0
>> (TID
>> > > 9561) (10.42.240.4 executor 4): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302182889441465469285_0026_m_000011_9561
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 0.1 in stage 26.0
>> (TID
>> > > 9563) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304682784428756208427_0026_m_000000_9563
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:31 ERROR FileFormatWriter: Aborting job
>> > > 115cf3ce-5a9b-4274-8752-b6ead281f104.
>> > > org.apache.spark.SparkException: Job aborted due to stage failure:
>> Task 2
>> > > in stage 26.0 failed 4 times, most recent failure: Lost task 2.3 in
>> stage
>> > > 26.0 (TID 9558) (10.42.240.3 executor 1): java.io.IOException: Mkdirs
>> > > failed to create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > Driver stacktrace:
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
>> > >         at
>> > >
>> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
>> > >         at
>> > >
>> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
>> > >         at
>> > > scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
>> > >         at scala.Option.foreach(Option.scala:407)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
>> > >         at
>> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
>> > >         at
>> > > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
>> > >         at
>> org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
>> > >         at
>> > > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
>> > >         at
>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
>> > >
>> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
>> > >         at
>> > >
>> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
>> > >         at
>> > >
>> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
>> > >         at
>> > >
>> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
>> > >         at
>> > > org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
>> > >         at
>> > >
>> org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:781)
>> > >         at
>> > > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
>> > > Method)
>> > >         at
>> > >
>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>> > >         at
>> > >
>> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>> > >         at java.base/java.lang.reflect.Method.invoke(Method.java:566)
>> > >         at
>> py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
>> > >         at
>> > > py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
>> > >         at py4j.Gateway.invoke(Gateway.java:282)
>> > >         at
>> > > py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
>> > >         at py4j.commands.CallCommand.execute(CallCommand.java:79)
>> > >         at
>> > >
>> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
>> > >         at
>> py4j.ClientServerConnection.run(ClientServerConnection.java:106)
>> > >         at java.base/java.lang.Thread.run(Thread.java:829)
>> > > Caused by: java.io.IOException: Mkdirs failed to create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 7.1 in stage 26.0
>> (TID
>> > > 9562) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304432517406660432032_0026_m_000007_9562
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 9.0 in stage 26.0
>> (TID
>> > > 9550) (10.42.128.10 executor 9): java.io.IOException: Mkdirs failed to
>> > > create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906305148471752983202631_0026_m_000009_9550
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 14.0 in stage 26.0
>> (TID
>> > > 9566) (10.42.96.9 executor 7): TaskKilled (Stage cancelled)
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 6.0 in stage 26.0
>> (TID
>> > > 9547) (10.42.32.12 executor 11): TaskKilled (Stage cancelled)
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 13.0 in stage 26.0
>> (TID
>> > > 9565) (10.42.0.17 executor 6): TaskKilled (Stage cancelled)
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 12.0 in stage 26.0
>> (TID
>> > > 9564) (10.42.0.12 executor 3): TaskKilled (Stage cancelled)
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 10.2 in stage 26.0
>> (TID
>> > > 9560) (10.42.192.9 executor 8): TaskKilled (Stage cancelled)
>> > > [Stage 26:>                                                       (0
>> + 1)
>> > > / 132]
>> > >
>> ---------------------------------------------------------------------------
>> > > Py4JJavaError                             Traceback (most recent call
>> last)
>> > > /tmp/ipykernel_80/610855484.py in <module>
>> > > ----> 1
>> df.write.parquet("/opt/spark/work-dir/falk/F01test_df.parquet",
>> > > mode="overwrite")
>> > >
>> > > /opt/spark/python/pyspark/sql/readwriter.py in parquet(self, path,
>> mode,
>> > > partitionBy, compression)
>> > >     883             self.partitionBy(partitionBy)
>> > >     884         self._set_opts(compression=compression)
>> > > --> 885         self._jwrite.parquet(path)
>> > >     886
>> > >     887     def text(self, path, compression=None, lineSep=None):
>> > >
>> > > /opt/conda/lib/python3.9/site-packages/py4j/java_gateway.py in
>> > > __call__(self, *args)
>> > >    1307
>> > >    1308         answer = self.gateway_client.send_command(command)
>> > > -> 1309         return_value = get_return_value(
>> > >    1310             answer, self.gateway_client, self.target_id,
>> self.name
>> > > )
>> > >    1311
>> > >
>> > > /opt/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
>> > >     109     def deco(*a, **kw):
>> > >     110         try:
>> > > --> 111             return f(*a, **kw)
>> > >     112         except py4j.protocol.Py4JJavaError as e:
>> > >     113             converted = convert_exception(e.java_exception)
>> > >
>> > > /opt/conda/lib/python3.9/site-packages/py4j/protocol.py in
>> > > get_return_value(answer, gateway_client, target_id, name)
>> > >     324             value = OUTPUT_CONVERTER[type](answer[2:],
>> > > gateway_client)
>> > >     325             if answer[1] == REFERENCE_TYPE:
>> > > --> 326                 raise Py4JJavaError(
>> > >     327                     "An error occurred while calling
>> {0}{1}{2}.\n".
>> > >     328                     format(target_id, ".", name), value)
>> > >
>> > > Py4JJavaError: An error occurred while calling o15435.parquet.
>> > > : org.apache.spark.SparkException: Job aborted.
>> > >         at
>> > >
>> org.apache.spark.sql.errors.QueryExecutionErrors$.jobAbortedError(QueryExecutionErrors.scala:496)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:251)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
>> > >         at
>> > > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
>> > >         at
>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
>> > >
>> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>> > >         at
>> > >
>> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
>> > >         at
>> > >
>> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
>> > >         at
>> > >
>> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
>> > >         at
>> > >
>> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
>> > >         at
>> > > org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
>> > >         at
>> > >
>> org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:781)
>> > >         at
>> > > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
>> > > Method)
>> > >         at
>> > >
>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>> > >         at
>> > >
>> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>> > >         at java.base/java.lang.reflect.Method.invoke(Method.java:566)
>> > >         at
>> py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
>> > >         at
>> > > py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
>> > >         at py4j.Gateway.invoke(Gateway.java:282)
>> > >         at
>> > > py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
>> > >         at py4j.commands.CallCommand.execute(CallCommand.java:79)
>> > >         at
>> > >
>> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
>> > >         at
>> py4j.ClientServerConnection.run(ClientServerConnection.java:106)
>> > >         at java.base/java.lang.Thread.run(Thread.java:829)
>> > > Caused by: org.apache.spark.SparkException: Job aborted due to stage
>> > > failure: Task 2 in stage 26.0 failed 4 times, most recent failure:
>> Lost
>> > > task 2.3 in stage 26.0 (TID 9558) (10.42.240.3 executor 1):
>> > > java.io.IOException: Mkdirs failed to create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > >
>> > > Driver stacktrace:
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
>> > >         at
>> > >
>> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
>> > >         at
>> > >
>> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
>> > >         at
>> > > scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
>> > >         at scala.Option.foreach(Option.scala:407)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
>> > >         at
>> > >
>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
>> > >         at
>> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
>> > >         at
>> > > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
>> > >         at
>> org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
>> > >         ... 42 more
>> > > Caused by: java.io.IOException: Mkdirs failed to create
>> > >
>> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
>> > > (exists=false, cwd=file:/opt/spark/work-dir)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>> > >         at
>> > >
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>> > >         at
>> org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>> > >         at
>> > >
>> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>> > >         at
>> > >
>> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>> > >         at
>> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>> > >         at
>> > >
>> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>> > >         at
>> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>> > >         at
>> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
>> Source)
>> > >         at
>> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
>> Source)
>> > >         at java.base/java.lang.Thread.run(Unknown Source)
>> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 3.0 in stage 26.0
>> (TID
>> > > 9544) (10.42.0.18 executor 12): TaskKilled (Stage cancelled)
>> > >
>> > >
>> > >
>> > >
>> > >
>> > >
>> > >
>> > >
>> > >
>> > >
>> > > ---------------------------------------------------------------------
>> > > To unsubscribe e-mail: user-unsubscribe@spark.apache.org
>> > >
>> > >
>> >
>>
>> ---------------------------------------------------------------------
>> To unsubscribe e-mail: user-unsubscribe@spark.apache.org
>>
>>

-- 
Twitter: https://twitter.com/holdenkarau
Books (Learning Spark, High Performance Spark, etc.):
https://amzn.to/2MaRAG9  <https://amzn.to/2MaRAG9>
YouTube Live Streams: https://www.youtube.com/user/holdenkarau

Re: Can’t write to PVC in K8S

Posted by Mich Talebzadeh <mi...@gmail.com>.
I am not familiar with  jupyterlab  so cannot comment on that.

However, once your parquet file is written to the work-dir, how are you
going to utilise it?

HTH




   view my Linkedin profile
<https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>



*Disclaimer:* Use it at your own risk. Any and all responsibility for any
loss, damage or destruction of data or any other property which may arise
from relying on this email's technical content is explicitly disclaimed.
The author will in no case be liable for any monetary damages arising from
such loss, damage or destruction.




On Mon, 30 Aug 2021 at 22:05, Bjørn Jørgensen <bj...@gmail.com>
wrote:

> ok, so when I use spark on k8s I can only save files to s3 buckets or to a
> database?
>
> Note my setup, its spark with jupyterlab on top on k8s.
>
> What are those for if I cant write files from spark in k8s to disk?
>
> "spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
> "False"
> "spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
> "False"
>
> On 2021/08/30 20:50:22, Mich Talebzadeh <mi...@gmail.com>
> wrote:
> > Hi,
> >
> > You are trying to write to work-dir inside the docker and create
> > sub-directories:
> >
> > The error you are getting is this
> >
> > Mkdirs failed to create
> >
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304682784428756208427_0026_m_000000_9563
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >
> > That directory /work-dir is not recognised as a valid directory
> > for storage. It is not in HDFS or HCFS format
> >
> >
> > From Spark you can write to a bucket outside as a permanent storage.
> >
> > HTH
> >
> >
> >    view my Linkedin profile
> > <https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>
> >
> >
> >
> > *Disclaimer:* Use it at your own risk. Any and all responsibility for any
> > loss, damage or destruction of data or any other property which may arise
> > from relying on this email's technical content is explicitly disclaimed.
> > The author will in no case be liable for any monetary damages arising
> from
> > such loss, damage or destruction.
> >
> >
> >
> >
> > On Mon, 30 Aug 2021 at 14:11, Bjørn Jørgensen <bj...@gmail.com>
> > wrote:
> >
> > > Hi, I have built and running spark on k8s. A link to my repo
> > > https://github.com/bjornjorgensen/jlpyk8s
> > >
> > > Everything seems to be running fine, but I can’t save to PVC.
> > > If I convert the dataframe to pandas, then I can save it.
> > >
> > >
> > >
> > > from pyspark.sql import SparkSession
> > > spark = SparkSession.builder \
> > >     .master("k8s://https://kubernetes.default.svc.cluster.local:443")
> \
> > >     .config("spark.kubernetes.container.image",
> > > "bjornjorgensen/spark-py:v3.2-290821") \
> > >     .config("spark.kubernetes.authenticate.caCertFile",
> "/var/run/secrets/
> > > kubernetes.io/serviceaccount/ca.crt") \
> > >     .config("spark.kubernetes.authenticate.oauthTokenFile",
> > > "/var/run/secrets/kubernetes.io/serviceaccount/token") \
> > >     .config("spark.kubernetes.authenticate.driver.serviceAccountName",
> > > "my-pyspark-notebook") \
> > >     .config("spark.executor.instances", "10") \
> > >     .config("spark.driver.host",
> > > "my-pyspark-notebook-spark-driver.default.svc.cluster.local") \
> > >     .config("spark.driver.port", "29413") \
> > >
> > >
> .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.options.claimName",
> > > "nfs100") \
> > >
> > >
> .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.path",
> > > "/opt/spark/work-dir") \
> > >
> > >
> .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.options.claimName",
> > > "nfs100") \
> > >
> > >
> .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.path",
> > > "/opt/spark/work-dir") \
> > >
> > >
> .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
> > > "False") \
> > >
> > >
> .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
> > > "False") \
> > >     .appName("myApp") \
> > >     .config("spark.sql.repl.eagerEval.enabled", "True") \
> > >     .config("spark.driver.memory", "4g") \
> > >     .config("spark.executor.memory", "4g") \
> > >     .getOrCreate()
> > > sc = spark.sparkContext
> > >
> > > pdf.to_parquet("/opt/spark/work-dir/falk/test/F01test.parquet")
> > >
> > >
> > > 21/08/30 12:20:34 WARN WindowExec: No Partition Defined for Window
> > > operation! Moving all data to a single partition, this can cause
> serious
> > > performance degradation.
> > > 21/08/30 12:20:34 WARN WindowExec: No Partition Defined for Window
> > > operation! Moving all data to a single partition, this can cause
> serious
> > > performance degradation.
> > > 21/08/30 12:20:37 WARN WindowExec: No Partition Defined for Window
> > > operation! Moving all data to a single partition, this can cause
> serious
> > > performance degradation.
> > > 21/08/30 12:20:39 WARN TaskSetManager: Lost task 0.0 in stage 25.0 (TID
> > > 9497) (10.42.0.16 executor 3): java.io.IOException: Mkdirs failed to
> create
> > >
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220375889526593865835092_0025_m_000000_9497
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >
> > > 21/08/30 12:20:40 WARN TaskSetManager: Lost task 0.1 in stage 25.0 (TID
> > > 9498) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
> > > create
> > >
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220371965695886629589207_0025_m_000000_9498
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >
> > > 21/08/30 12:20:42 WARN TaskSetManager: Lost task 0.2 in stage 25.0 (TID
> > > 9499) (10.42.240.4 executor 4): java.io.IOException: Mkdirs failed to
> > > create
> > >
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220378533320694235394580_0025_m_000000_9499
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >
> > > 21/08/30 12:20:43 WARN TaskSetManager: Lost task 0.3 in stage 25.0 (TID
> > > 9500) (10.42.32.15 executor 10): java.io.IOException: Mkdirs failed to
> > > create
> > >
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >
> > > 21/08/30 12:20:43 ERROR TaskSetManager: Task 0 in stage 25.0 failed 4
> > > times; aborting job
> > > 21/08/30 12:20:43 ERROR FileFormatWriter: Aborting job
> > > d98cdc60-bb44-4189-b483-8449fc793658.
> > > org.apache.spark.SparkException: Job aborted due to stage failure:
> Task 0
> > > in stage 25.0 failed 4 times, most recent failure: Lost task 0.3 in
> stage
> > > 25.0 (TID 9500) (10.42.32.15 executor 10): java.io.IOException: Mkdirs
> > > failed to create
> > >
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >
> > > Driver stacktrace:
> > >         at
> > >
> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
> > >         at
> > >
> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
> > >         at
> > >
> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
> > >         at
> > >
> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
> > >         at
> > >
> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
> > >         at
> > > scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
> > >         at
> > >
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
> > >         at
> > >
> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
> > >         at
> > >
> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
> > >         at scala.Option.foreach(Option.scala:407)
> > >         at
> > >
> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
> > >         at
> > >
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
> > >         at
> > >
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
> > >         at
> > >
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
> > >         at
> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
> > >         at
> > > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
> > >         at
> org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
> > >         at
> > >
> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
> > >         at
> > >
> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
> > >         at
> > >
> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
> > >         at
> > >
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
> > >         at
> > >
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
> > >         at
> > >
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
> > >         at
> > >
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
> > >         at
> > > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
> > >         at
> > >
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
> > >         at
> > >
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
> > >         at
> > >
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
> > >         at
> > >
> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
> > >         at
> > >
> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
> > >         at
> > >
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
> > >         at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
> > >
> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
> > >         at
> > >
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
> > >         at
> > >
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
> > >         at
> > >
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> > >         at
> > >
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> > >         at
> > >
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
> > >         at
> > >
> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
> > >         at
> > >
> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
> > >         at
> > >
> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
> > >         at
> > >
> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
> > >         at
> > >
> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
> > >         at
> > >
> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
> > >         at
> > >
> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
> > >         at
> > > org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
> > >         at
> > > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
> > > Method)
> > >         at
> > >
> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> > >         at
> > >
> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> > >         at java.base/java.lang.reflect.Method.invoke(Method.java:566)
> > >         at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
> > >         at
> > > py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
> > >         at py4j.Gateway.invoke(Gateway.java:282)
> > >         at
> > > py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
> > >         at py4j.commands.CallCommand.execute(CallCommand.java:79)
> > >         at
> > >
> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
> > >         at
> py4j.ClientServerConnection.run(ClientServerConnection.java:106)
> > >         at java.base/java.lang.Thread.run(Thread.java:829)
> > > Caused by: java.io.IOException: Mkdirs failed to create
> > >
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >
> ---------------------------------------------------------------------------
> > > Py4JJavaError                             Traceback (most recent call
> last)
> > > /tmp/ipykernel_80/163396320.py in <module>
> > > ----> 1 pdf.to_parquet("/opt/spark/work-dir/falk/test/F01test.parquet")
> > >
> > > /opt/spark/python/pyspark/pandas/frame.py in to_parquet(self, path,
> mode,
> > > partition_cols, compression, index_col, **options)
> > >    4721         if compression is not None:
> > >    4722             builder.option("compression", compression)
> > > -> 4723         builder.options(**options).format("parquet").save(path)
> > >    4724
> > >    4725     def to_orc(
> > >
> > > /opt/spark/python/pyspark/sql/readwriter.py in save(self, path, format,
> > > mode, partitionBy, **options)
> > >     738             self._jwrite.save()
> > >     739         else:
> > > --> 740             self._jwrite.save(path)
> > >     741
> > >     742     @since(1.4)
> > >
> > > /opt/conda/lib/python3.9/site-packages/py4j/java_gateway.py in
> > > __call__(self, *args)
> > >    1307
> > >    1308         answer = self.gateway_client.send_command(command)
> > > -> 1309         return_value = get_return_value(
> > >    1310             answer, self.gateway_client, self.target_id,
> self.name
> > > )
> > >    1311
> > >
> > > /opt/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
> > >     109     def deco(*a, **kw):
> > >     110         try:
> > > --> 111             return f(*a, **kw)
> > >     112         except py4j.protocol.Py4JJavaError as e:
> > >     113             converted = convert_exception(e.java_exception)
> > >
> > > /opt/conda/lib/python3.9/site-packages/py4j/protocol.py in
> > > get_return_value(answer, gateway_client, target_id, name)
> > >     324             value = OUTPUT_CONVERTER[type](answer[2:],
> > > gateway_client)
> > >     325             if answer[1] == REFERENCE_TYPE:
> > > --> 326                 raise Py4JJavaError(
> > >     327                     "An error occurred while calling
> {0}{1}{2}.\n".
> > >     328                     format(target_id, ".", name), value)
> > >
> > > Py4JJavaError: An error occurred while calling o4804.save.
> > > : org.apache.spark.SparkException: Job aborted.
> > >         at
> > >
> org.apache.spark.sql.errors.QueryExecutionErrors$.jobAbortedError(QueryExecutionErrors.scala:496)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:251)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
> > >         at
> > >
> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
> > >         at
> > >
> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
> > >         at
> > >
> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
> > >         at
> > >
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
> > >         at
> > >
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
> > >         at
> > >
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
> > >         at
> > >
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
> > >         at
> > > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
> > >         at
> > >
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
> > >         at
> > >
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
> > >         at
> > >
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
> > >         at
> > >
> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
> > >         at
> > >
> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
> > >         at
> > >
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
> > >         at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
> > >
> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
> > >         at
> > >
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
> > >         at
> > >
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
> > >         at
> > >
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> > >         at
> > >
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> > >         at
> > >
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
> > >         at
> > >
> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
> > >         at
> > >
> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
> > >         at
> > >
> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
> > >         at
> > >
> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
> > >         at
> > >
> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
> > >         at
> > >
> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
> > >         at
> > >
> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
> > >         at
> > > org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
> > >         at
> > > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
> > > Method)
> > >         at
> > >
> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> > >         at
> > >
> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> > >         at java.base/java.lang.reflect.Method.invoke(Method.java:566)
> > >         at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
> > >         at
> > > py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
> > >         at py4j.Gateway.invoke(Gateway.java:282)
> > >         at
> > > py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
> > >         at py4j.commands.CallCommand.execute(CallCommand.java:79)
> > >         at
> > >
> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
> > >         at
> py4j.ClientServerConnection.run(ClientServerConnection.java:106)
> > >         at java.base/java.lang.Thread.run(Thread.java:829)
> > > Caused by: org.apache.spark.SparkException: Job aborted due to stage
> > > failure: Task 0 in stage 25.0 failed 4 times, most recent failure: Lost
> > > task 0.3 in stage 25.0 (TID 9500) (10.42.32.15 executor 10):
> > > java.io.IOException: Mkdirs failed to create
> > >
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >
> > > Driver stacktrace:
> > >         at
> > >
> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
> > >         at
> > >
> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
> > >         at
> > >
> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
> > >         at
> > >
> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
> > >         at
> > >
> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
> > >         at
> > > scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
> > >         at
> > >
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
> > >         at
> > >
> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
> > >         at
> > >
> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
> > >         at scala.Option.foreach(Option.scala:407)
> > >         at
> > >
> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
> > >         at
> > >
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
> > >         at
> > >
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
> > >         at
> > >
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
> > >         at
> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
> > >         at
> > > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
> > >         at
> org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
> > >         ... 41 more
> > > Caused by: java.io.IOException: Mkdirs failed to create
> > >
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >
> > >
> > >
> > >
> > > df.write.parquet("/opt/spark/work-dir/falk/F01test_df.parquet",
> > > mode="overwrite")
> > >
> > >
> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 2.0 in stage 26.0 (TID
> > > 9543) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
> > > create
> > >
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_20210829190630570334759957727637_0026_m_000002_9543
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >
> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 1.0 in stage 26.0 (TID
> > > 9542) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
> > > create
> > >
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906306992160257769852924_0026_m_000001_9542
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >
> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 4.0 in stage 26.0 (TID
> > > 9545) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to
> create
> > >
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906305635902832664702349_0026_m_000004_9545
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >
> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 10.0 in stage 26.0
> (TID
> > > 9551) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
> > > create
> > >
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303695223706240035696_0026_m_000010_9551
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >
> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 2.1 in stage 26.0 (TID
> > > 9552) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
> > > create
> > >
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303153023682655991980_0026_m_000002_9552
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >
> > > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 1.1 in stage 26.0 (TID
> > > 9553) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to
> create
> > >
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303560179987922074406_0026_m_000001_9553
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >
> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 2.2 in stage 26.0 (TID
> > > 9556) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to
> create
> > >
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906307237274992181823763_0026_m_000002_9556
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >
> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 4.1 in stage 26.0 (TID
> > > 9554) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
> > > create
> > >
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906306864123848918470508_0026_m_000004_9554
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >
> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 10.1 in stage 26.0
> (TID
> > > 9555) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
> > > create
> > >
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302601970797047480301_0026_m_000010_9555
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >
> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 0.0 in stage 26.0 (TID
> > > 9541) (10.42.192.9 executor 8): java.io.IOException: Mkdirs failed to
> > > create
> > >
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304154332762277555982_0026_m_000000_9541
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >
> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 7.0 in stage 26.0 (TID
> > > 9548) (10.42.240.4 executor 4): java.io.IOException: Mkdirs failed to
> > > create
> > >
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_20210829190630694656629969727231_0026_m_000007_9548
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >
> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 4.2 in stage 26.0 (TID
> > > 9559) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
> > > create
> > >
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303848774759656984701_0026_m_000004_9559
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >
> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 2.3 in stage 26.0 (TID
> > > 9558) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
> > > create
> > >
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >
> > > 21/08/29 19:06:31 ERROR TaskSetManager: Task 2 in stage 26.0 failed 4
> > > times; aborting job
> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 1.2 in stage 26.0 (TID
> > > 9557) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to
> create
> > >
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302130961573080351978_0026_m_000001_9557
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >
> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 8.0 in stage 26.0 (TID
> > > 9549) (10.42.0.17 executor 6): java.io.IOException: Mkdirs failed to
> create
> > >
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906306776907150898092479_0026_m_000008_9549
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >
> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 5.0 in stage 26.0 (TID
> > > 9546) (10.42.96.9 executor 7): java.io.IOException: Mkdirs failed to
> create
> > >
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906301725332653584503335_0026_m_000005_9546
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >
> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 11.0 in stage 26.0
> (TID
> > > 9561) (10.42.240.4 executor 4): java.io.IOException: Mkdirs failed to
> > > create
> > >
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302182889441465469285_0026_m_000011_9561
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >
> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 0.1 in stage 26.0 (TID
> > > 9563) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
> > > create
> > >
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304682784428756208427_0026_m_000000_9563
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >
> > > 21/08/29 19:06:31 ERROR FileFormatWriter: Aborting job
> > > 115cf3ce-5a9b-4274-8752-b6ead281f104.
> > > org.apache.spark.SparkException: Job aborted due to stage failure:
> Task 2
> > > in stage 26.0 failed 4 times, most recent failure: Lost task 2.3 in
> stage
> > > 26.0 (TID 9558) (10.42.240.3 executor 1): java.io.IOException: Mkdirs
> > > failed to create
> > >
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >
> > > Driver stacktrace:
> > >         at
> > >
> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
> > >         at
> > >
> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
> > >         at
> > >
> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
> > >         at
> > >
> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
> > >         at
> > >
> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
> > >         at
> > > scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
> > >         at
> > >
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
> > >         at
> > >
> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
> > >         at
> > >
> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
> > >         at scala.Option.foreach(Option.scala:407)
> > >         at
> > >
> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
> > >         at
> > >
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
> > >         at
> > >
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
> > >         at
> > >
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
> > >         at
> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
> > >         at
> > > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
> > >         at
> org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
> > >         at
> > >
> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
> > >         at
> > >
> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
> > >         at
> > >
> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
> > >         at
> > >
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
> > >         at
> > >
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
> > >         at
> > >
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
> > >         at
> > >
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
> > >         at
> > > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
> > >         at
> > >
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
> > >         at
> > >
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
> > >         at
> > >
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
> > >         at
> > >
> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
> > >         at
> > >
> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
> > >         at
> > >
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
> > >         at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
> > >
> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
> > >         at
> > >
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
> > >         at
> > >
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
> > >         at
> > >
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> > >         at
> > >
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> > >         at
> > >
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
> > >         at
> > >
> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
> > >         at
> > >
> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
> > >         at
> > >
> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
> > >         at
> > >
> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
> > >         at
> > >
> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
> > >         at
> > >
> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
> > >         at
> > >
> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
> > >         at
> > > org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
> > >         at
> > > org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:781)
> > >         at
> > > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
> > > Method)
> > >         at
> > >
> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> > >         at
> > >
> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> > >         at java.base/java.lang.reflect.Method.invoke(Method.java:566)
> > >         at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
> > >         at
> > > py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
> > >         at py4j.Gateway.invoke(Gateway.java:282)
> > >         at
> > > py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
> > >         at py4j.commands.CallCommand.execute(CallCommand.java:79)
> > >         at
> > >
> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
> > >         at
> py4j.ClientServerConnection.run(ClientServerConnection.java:106)
> > >         at java.base/java.lang.Thread.run(Thread.java:829)
> > > Caused by: java.io.IOException: Mkdirs failed to create
> > >
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 7.1 in stage 26.0 (TID
> > > 9562) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
> > > create
> > >
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304432517406660432032_0026_m_000007_9562
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >
> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 9.0 in stage 26.0 (TID
> > > 9550) (10.42.128.10 executor 9): java.io.IOException: Mkdirs failed to
> > > create
> > >
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906305148471752983202631_0026_m_000009_9550
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >
> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 14.0 in stage 26.0
> (TID
> > > 9566) (10.42.96.9 executor 7): TaskKilled (Stage cancelled)
> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 6.0 in stage 26.0 (TID
> > > 9547) (10.42.32.12 executor 11): TaskKilled (Stage cancelled)
> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 13.0 in stage 26.0
> (TID
> > > 9565) (10.42.0.17 executor 6): TaskKilled (Stage cancelled)
> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 12.0 in stage 26.0
> (TID
> > > 9564) (10.42.0.12 executor 3): TaskKilled (Stage cancelled)
> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 10.2 in stage 26.0
> (TID
> > > 9560) (10.42.192.9 executor 8): TaskKilled (Stage cancelled)
> > > [Stage 26:>                                                       (0 +
> 1)
> > > / 132]
> > >
> ---------------------------------------------------------------------------
> > > Py4JJavaError                             Traceback (most recent call
> last)
> > > /tmp/ipykernel_80/610855484.py in <module>
> > > ----> 1 df.write.parquet("/opt/spark/work-dir/falk/F01test_df.parquet",
> > > mode="overwrite")
> > >
> > > /opt/spark/python/pyspark/sql/readwriter.py in parquet(self, path,
> mode,
> > > partitionBy, compression)
> > >     883             self.partitionBy(partitionBy)
> > >     884         self._set_opts(compression=compression)
> > > --> 885         self._jwrite.parquet(path)
> > >     886
> > >     887     def text(self, path, compression=None, lineSep=None):
> > >
> > > /opt/conda/lib/python3.9/site-packages/py4j/java_gateway.py in
> > > __call__(self, *args)
> > >    1307
> > >    1308         answer = self.gateway_client.send_command(command)
> > > -> 1309         return_value = get_return_value(
> > >    1310             answer, self.gateway_client, self.target_id,
> self.name
> > > )
> > >    1311
> > >
> > > /opt/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
> > >     109     def deco(*a, **kw):
> > >     110         try:
> > > --> 111             return f(*a, **kw)
> > >     112         except py4j.protocol.Py4JJavaError as e:
> > >     113             converted = convert_exception(e.java_exception)
> > >
> > > /opt/conda/lib/python3.9/site-packages/py4j/protocol.py in
> > > get_return_value(answer, gateway_client, target_id, name)
> > >     324             value = OUTPUT_CONVERTER[type](answer[2:],
> > > gateway_client)
> > >     325             if answer[1] == REFERENCE_TYPE:
> > > --> 326                 raise Py4JJavaError(
> > >     327                     "An error occurred while calling
> {0}{1}{2}.\n".
> > >     328                     format(target_id, ".", name), value)
> > >
> > > Py4JJavaError: An error occurred while calling o15435.parquet.
> > > : org.apache.spark.SparkException: Job aborted.
> > >         at
> > >
> org.apache.spark.sql.errors.QueryExecutionErrors$.jobAbortedError(QueryExecutionErrors.scala:496)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:251)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
> > >         at
> > >
> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
> > >         at
> > >
> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
> > >         at
> > >
> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
> > >         at
> > >
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
> > >         at
> > >
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
> > >         at
> > >
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
> > >         at
> > >
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
> > >         at
> > > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
> > >         at
> > >
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
> > >         at
> > >
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
> > >         at
> > >
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
> > >         at
> > >
> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
> > >         at
> > >
> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
> > >         at
> > >
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
> > >         at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
> > >
> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
> > >         at
> > >
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
> > >         at
> > >
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
> > >         at
> > >
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> > >         at
> > >
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> > >         at
> > >
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
> > >         at
> > >
> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
> > >         at
> > >
> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
> > >         at
> > >
> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
> > >         at
> > >
> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
> > >         at
> > >
> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
> > >         at
> > >
> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
> > >         at
> > >
> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
> > >         at
> > > org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
> > >         at
> > > org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:781)
> > >         at
> > > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
> > > Method)
> > >         at
> > >
> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> > >         at
> > >
> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> > >         at java.base/java.lang.reflect.Method.invoke(Method.java:566)
> > >         at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
> > >         at
> > > py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
> > >         at py4j.Gateway.invoke(Gateway.java:282)
> > >         at
> > > py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
> > >         at py4j.commands.CallCommand.execute(CallCommand.java:79)
> > >         at
> > >
> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
> > >         at
> py4j.ClientServerConnection.run(ClientServerConnection.java:106)
> > >         at java.base/java.lang.Thread.run(Thread.java:829)
> > > Caused by: org.apache.spark.SparkException: Job aborted due to stage
> > > failure: Task 2 in stage 26.0 failed 4 times, most recent failure: Lost
> > > task 2.3 in stage 26.0 (TID 9558) (10.42.240.3 executor 1):
> > > java.io.IOException: Mkdirs failed to create
> > >
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > >
> > > Driver stacktrace:
> > >         at
> > >
> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
> > >         at
> > >
> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
> > >         at
> > >
> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
> > >         at
> > >
> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
> > >         at
> > >
> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
> > >         at
> > > scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
> > >         at
> > >
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
> > >         at
> > >
> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
> > >         at
> > >
> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
> > >         at scala.Option.foreach(Option.scala:407)
> > >         at
> > >
> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
> > >         at
> > >
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
> > >         at
> > >
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
> > >         at
> > >
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
> > >         at
> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
> > >         at
> > > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
> > >         at
> org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
> > >         ... 42 more
> > > Caused by: java.io.IOException: Mkdirs failed to create
> > >
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
> > > (exists=false, cwd=file:/opt/spark/work-dir)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> > >         at
> > >
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> > >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> > >         at
> > >
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> > >         at
> > >
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> > >         at
> > >
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> > >         at
> > > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> > >         at
> > >
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> > >         at
> > > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> > >         at
> > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown
> Source)
> > >         at
> > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
> Source)
> > >         at java.base/java.lang.Thread.run(Unknown Source)
> > > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 3.0 in stage 26.0 (TID
> > > 9544) (10.42.0.18 executor 12): TaskKilled (Stage cancelled)
> > >
> > >
> > >
> > >
> > >
> > >
> > >
> > >
> > >
> > >
> > > ---------------------------------------------------------------------
> > > To unsubscribe e-mail: user-unsubscribe@spark.apache.org
> > >
> > >
> >
>
> ---------------------------------------------------------------------
> To unsubscribe e-mail: user-unsubscribe@spark.apache.org
>
>

Re: Can���t write to PVC in K8S

Posted by Bj��rn J��rgensen <bj...@gmail.com>.
ok, so when I use spark on k8s I can only save files to s3 buckets or to a database? 

Note my setup, its spark with jupyterlab on top on k8s. 

What are those for if I cant write files from spark in k8s to disk? 

"spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.readOnly", "False"
"spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.readOnly", "False"

On 2021/08/30 20:50:22, Mich Talebzadeh <mi...@gmail.com> wrote: 
> Hi,
> 
> You are trying to write to work-dir inside the docker and create
> sub-directories:
> 
> The error you are getting is this
> 
> Mkdirs failed to create
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304682784428756208427_0026_m_000000_9563
> (exists=false, cwd=file:/opt/spark/work-dir)
> 
> That directory /work-dir is not recognised as a valid directory
> for storage. It is not in HDFS or HCFS format
> 
> 
> From Spark you can write to a bucket outside as a permanent storage.
> 
> HTH
> 
> 
>    view my Linkedin profile
> <https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>
> 
> 
> 
> *Disclaimer:* Use it at your own risk. Any and all responsibility for any
> loss, damage or destruction of data or any other property which may arise
> from relying on this email's technical content is explicitly disclaimed.
> The author will in no case be liable for any monetary damages arising from
> such loss, damage or destruction.
> 
> 
> 
> 
> On Mon, 30 Aug 2021 at 14:11, Bjørn Jørgensen <bj...@gmail.com>
> wrote:
> 
> > Hi, I have built and running spark on k8s. A link to my repo
> > https://github.com/bjornjorgensen/jlpyk8s
> >
> > Everything seems to be running fine, but I can’t save to PVC.
> > If I convert the dataframe to pandas, then I can save it.
> >
> >
> >
> > from pyspark.sql import SparkSession
> > spark = SparkSession.builder \
> >     .master("k8s://https://kubernetes.default.svc.cluster.local:443") \
> >     .config("spark.kubernetes.container.image",
> > "bjornjorgensen/spark-py:v3.2-290821") \
> >     .config("spark.kubernetes.authenticate.caCertFile", "/var/run/secrets/
> > kubernetes.io/serviceaccount/ca.crt") \
> >     .config("spark.kubernetes.authenticate.oauthTokenFile",
> > "/var/run/secrets/kubernetes.io/serviceaccount/token") \
> >     .config("spark.kubernetes.authenticate.driver.serviceAccountName",
> > "my-pyspark-notebook") \
> >     .config("spark.executor.instances", "10") \
> >     .config("spark.driver.host",
> > "my-pyspark-notebook-spark-driver.default.svc.cluster.local") \
> >     .config("spark.driver.port", "29413") \
> >
> > .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.options.claimName",
> > "nfs100") \
> >
> > .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.path",
> > "/opt/spark/work-dir") \
> >
> > .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.options.claimName",
> > "nfs100") \
> >
> > .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.path",
> > "/opt/spark/work-dir") \
> >
> > .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
> > "False") \
> >
> > .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
> > "False") \
> >     .appName("myApp") \
> >     .config("spark.sql.repl.eagerEval.enabled", "True") \
> >     .config("spark.driver.memory", "4g") \
> >     .config("spark.executor.memory", "4g") \
> >     .getOrCreate()
> > sc = spark.sparkContext
> >
> > pdf.to_parquet("/opt/spark/work-dir/falk/test/F01test.parquet")
> >
> >
> > 21/08/30 12:20:34 WARN WindowExec: No Partition Defined for Window
> > operation! Moving all data to a single partition, this can cause serious
> > performance degradation.
> > 21/08/30 12:20:34 WARN WindowExec: No Partition Defined for Window
> > operation! Moving all data to a single partition, this can cause serious
> > performance degradation.
> > 21/08/30 12:20:37 WARN WindowExec: No Partition Defined for Window
> > operation! Moving all data to a single partition, this can cause serious
> > performance degradation.
> > 21/08/30 12:20:39 WARN TaskSetManager: Lost task 0.0 in stage 25.0 (TID
> > 9497) (10.42.0.16 executor 3): java.io.IOException: Mkdirs failed to create
> > file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220375889526593865835092_0025_m_000000_9497
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> >
> > 21/08/30 12:20:40 WARN TaskSetManager: Lost task 0.1 in stage 25.0 (TID
> > 9498) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
> > create
> > file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220371965695886629589207_0025_m_000000_9498
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> >
> > 21/08/30 12:20:42 WARN TaskSetManager: Lost task 0.2 in stage 25.0 (TID
> > 9499) (10.42.240.4 executor 4): java.io.IOException: Mkdirs failed to
> > create
> > file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220378533320694235394580_0025_m_000000_9499
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> >
> > 21/08/30 12:20:43 WARN TaskSetManager: Lost task 0.3 in stage 25.0 (TID
> > 9500) (10.42.32.15 executor 10): java.io.IOException: Mkdirs failed to
> > create
> > file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> >
> > 21/08/30 12:20:43 ERROR TaskSetManager: Task 0 in stage 25.0 failed 4
> > times; aborting job
> > 21/08/30 12:20:43 ERROR FileFormatWriter: Aborting job
> > d98cdc60-bb44-4189-b483-8449fc793658.
> > org.apache.spark.SparkException: Job aborted due to stage failure: Task 0
> > in stage 25.0 failed 4 times, most recent failure: Lost task 0.3 in stage
> > 25.0 (TID 9500) (10.42.32.15 executor 10): java.io.IOException: Mkdirs
> > failed to create
> > file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> >
> > Driver stacktrace:
> >         at
> > org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
> >         at
> > org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
> >         at
> > org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
> >         at
> > scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
> >         at
> > scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
> >         at
> > scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
> >         at
> > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
> >         at
> > org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
> >         at
> > org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
> >         at scala.Option.foreach(Option.scala:407)
> >         at
> > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
> >         at
> > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
> >         at
> > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
> >         at
> > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
> >         at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
> >         at
> > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
> >         at org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
> >         at
> > org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
> >         at
> > org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
> >         at
> > org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
> >         at
> > org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
> >         at
> > org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
> >         at
> > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
> >         at
> > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
> >         at
> > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
> >         at
> > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
> >         at
> > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
> >         at
> > org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
> >         at
> > org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
> >         at
> > org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
> >         at
> > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
> >         at
> > org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
> >         at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
> > $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
> >         at
> > org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
> >         at
> > org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
> >         at
> > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> >         at
> > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> >         at
> > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
> >         at
> > org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
> >         at
> > org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
> >         at
> > org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
> >         at
> > org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
> >         at
> > org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
> >         at
> > org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
> >         at
> > org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
> >         at
> > org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
> >         at
> > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
> > Method)
> >         at
> > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> >         at
> > java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> >         at java.base/java.lang.reflect.Method.invoke(Method.java:566)
> >         at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
> >         at
> > py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
> >         at py4j.Gateway.invoke(Gateway.java:282)
> >         at
> > py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
> >         at py4j.commands.CallCommand.execute(CallCommand.java:79)
> >         at
> > py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
> >         at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
> >         at java.base/java.lang.Thread.run(Thread.java:829)
> > Caused by: java.io.IOException: Mkdirs failed to create
> > file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> > ---------------------------------------------------------------------------
> > Py4JJavaError                             Traceback (most recent call last)
> > /tmp/ipykernel_80/163396320.py in <module>
> > ----> 1 pdf.to_parquet("/opt/spark/work-dir/falk/test/F01test.parquet")
> >
> > /opt/spark/python/pyspark/pandas/frame.py in to_parquet(self, path, mode,
> > partition_cols, compression, index_col, **options)
> >    4721         if compression is not None:
> >    4722             builder.option("compression", compression)
> > -> 4723         builder.options(**options).format("parquet").save(path)
> >    4724
> >    4725     def to_orc(
> >
> > /opt/spark/python/pyspark/sql/readwriter.py in save(self, path, format,
> > mode, partitionBy, **options)
> >     738             self._jwrite.save()
> >     739         else:
> > --> 740             self._jwrite.save(path)
> >     741
> >     742     @since(1.4)
> >
> > /opt/conda/lib/python3.9/site-packages/py4j/java_gateway.py in
> > __call__(self, *args)
> >    1307
> >    1308         answer = self.gateway_client.send_command(command)
> > -> 1309         return_value = get_return_value(
> >    1310             answer, self.gateway_client, self.target_id, self.name
> > )
> >    1311
> >
> > /opt/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
> >     109     def deco(*a, **kw):
> >     110         try:
> > --> 111             return f(*a, **kw)
> >     112         except py4j.protocol.Py4JJavaError as e:
> >     113             converted = convert_exception(e.java_exception)
> >
> > /opt/conda/lib/python3.9/site-packages/py4j/protocol.py in
> > get_return_value(answer, gateway_client, target_id, name)
> >     324             value = OUTPUT_CONVERTER[type](answer[2:],
> > gateway_client)
> >     325             if answer[1] == REFERENCE_TYPE:
> > --> 326                 raise Py4JJavaError(
> >     327                     "An error occurred while calling {0}{1}{2}.\n".
> >     328                     format(target_id, ".", name), value)
> >
> > Py4JJavaError: An error occurred while calling o4804.save.
> > : org.apache.spark.SparkException: Job aborted.
> >         at
> > org.apache.spark.sql.errors.QueryExecutionErrors$.jobAbortedError(QueryExecutionErrors.scala:496)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:251)
> >         at
> > org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
> >         at
> > org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
> >         at
> > org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
> >         at
> > org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
> >         at
> > org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
> >         at
> > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
> >         at
> > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
> >         at
> > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
> >         at
> > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
> >         at
> > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
> >         at
> > org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
> >         at
> > org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
> >         at
> > org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
> >         at
> > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
> >         at
> > org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
> >         at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
> > $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
> >         at
> > org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
> >         at
> > org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
> >         at
> > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> >         at
> > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> >         at
> > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
> >         at
> > org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
> >         at
> > org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
> >         at
> > org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
> >         at
> > org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
> >         at
> > org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
> >         at
> > org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
> >         at
> > org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
> >         at
> > org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
> >         at
> > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
> > Method)
> >         at
> > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> >         at
> > java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> >         at java.base/java.lang.reflect.Method.invoke(Method.java:566)
> >         at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
> >         at
> > py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
> >         at py4j.Gateway.invoke(Gateway.java:282)
> >         at
> > py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
> >         at py4j.commands.CallCommand.execute(CallCommand.java:79)
> >         at
> > py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
> >         at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
> >         at java.base/java.lang.Thread.run(Thread.java:829)
> > Caused by: org.apache.spark.SparkException: Job aborted due to stage
> > failure: Task 0 in stage 25.0 failed 4 times, most recent failure: Lost
> > task 0.3 in stage 25.0 (TID 9500) (10.42.32.15 executor 10):
> > java.io.IOException: Mkdirs failed to create
> > file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> >
> > Driver stacktrace:
> >         at
> > org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
> >         at
> > org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
> >         at
> > org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
> >         at
> > scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
> >         at
> > scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
> >         at
> > scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
> >         at
> > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
> >         at
> > org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
> >         at
> > org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
> >         at scala.Option.foreach(Option.scala:407)
> >         at
> > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
> >         at
> > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
> >         at
> > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
> >         at
> > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
> >         at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
> >         at
> > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
> >         at org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
> >         ... 41 more
> > Caused by: java.io.IOException: Mkdirs failed to create
> > file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> >
> >
> >
> >
> > df.write.parquet("/opt/spark/work-dir/falk/F01test_df.parquet",
> > mode="overwrite")
> >
> >
> > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 2.0 in stage 26.0 (TID
> > 9543) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
> > create
> > file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_20210829190630570334759957727637_0026_m_000002_9543
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> >
> > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 1.0 in stage 26.0 (TID
> > 9542) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
> > create
> > file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906306992160257769852924_0026_m_000001_9542
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> >
> > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 4.0 in stage 26.0 (TID
> > 9545) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to create
> > file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906305635902832664702349_0026_m_000004_9545
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> >
> > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 10.0 in stage 26.0 (TID
> > 9551) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
> > create
> > file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303695223706240035696_0026_m_000010_9551
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> >
> > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 2.1 in stage 26.0 (TID
> > 9552) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
> > create
> > file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303153023682655991980_0026_m_000002_9552
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> >
> > 21/08/29 19:06:30 WARN TaskSetManager: Lost task 1.1 in stage 26.0 (TID
> > 9553) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to create
> > file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303560179987922074406_0026_m_000001_9553
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> >
> > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 2.2 in stage 26.0 (TID
> > 9556) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to create
> > file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906307237274992181823763_0026_m_000002_9556
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> >
> > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 4.1 in stage 26.0 (TID
> > 9554) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
> > create
> > file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906306864123848918470508_0026_m_000004_9554
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> >
> > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 10.1 in stage 26.0 (TID
> > 9555) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
> > create
> > file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302601970797047480301_0026_m_000010_9555
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> >
> > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 0.0 in stage 26.0 (TID
> > 9541) (10.42.192.9 executor 8): java.io.IOException: Mkdirs failed to
> > create
> > file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304154332762277555982_0026_m_000000_9541
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> >
> > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 7.0 in stage 26.0 (TID
> > 9548) (10.42.240.4 executor 4): java.io.IOException: Mkdirs failed to
> > create
> > file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_20210829190630694656629969727231_0026_m_000007_9548
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> >
> > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 4.2 in stage 26.0 (TID
> > 9559) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
> > create
> > file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303848774759656984701_0026_m_000004_9559
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> >
> > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 2.3 in stage 26.0 (TID
> > 9558) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
> > create
> > file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> >
> > 21/08/29 19:06:31 ERROR TaskSetManager: Task 2 in stage 26.0 failed 4
> > times; aborting job
> > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 1.2 in stage 26.0 (TID
> > 9557) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to create
> > file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302130961573080351978_0026_m_000001_9557
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> >
> > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 8.0 in stage 26.0 (TID
> > 9549) (10.42.0.17 executor 6): java.io.IOException: Mkdirs failed to create
> > file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906306776907150898092479_0026_m_000008_9549
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> >
> > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 5.0 in stage 26.0 (TID
> > 9546) (10.42.96.9 executor 7): java.io.IOException: Mkdirs failed to create
> > file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906301725332653584503335_0026_m_000005_9546
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> >
> > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 11.0 in stage 26.0 (TID
> > 9561) (10.42.240.4 executor 4): java.io.IOException: Mkdirs failed to
> > create
> > file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302182889441465469285_0026_m_000011_9561
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> >
> > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 0.1 in stage 26.0 (TID
> > 9563) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
> > create
> > file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304682784428756208427_0026_m_000000_9563
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> >
> > 21/08/29 19:06:31 ERROR FileFormatWriter: Aborting job
> > 115cf3ce-5a9b-4274-8752-b6ead281f104.
> > org.apache.spark.SparkException: Job aborted due to stage failure: Task 2
> > in stage 26.0 failed 4 times, most recent failure: Lost task 2.3 in stage
> > 26.0 (TID 9558) (10.42.240.3 executor 1): java.io.IOException: Mkdirs
> > failed to create
> > file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> >
> > Driver stacktrace:
> >         at
> > org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
> >         at
> > org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
> >         at
> > org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
> >         at
> > scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
> >         at
> > scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
> >         at
> > scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
> >         at
> > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
> >         at
> > org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
> >         at
> > org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
> >         at scala.Option.foreach(Option.scala:407)
> >         at
> > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
> >         at
> > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
> >         at
> > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
> >         at
> > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
> >         at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
> >         at
> > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
> >         at org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
> >         at
> > org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
> >         at
> > org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
> >         at
> > org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
> >         at
> > org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
> >         at
> > org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
> >         at
> > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
> >         at
> > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
> >         at
> > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
> >         at
> > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
> >         at
> > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
> >         at
> > org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
> >         at
> > org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
> >         at
> > org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
> >         at
> > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
> >         at
> > org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
> >         at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
> > $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
> >         at
> > org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
> >         at
> > org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
> >         at
> > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> >         at
> > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> >         at
> > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
> >         at
> > org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
> >         at
> > org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
> >         at
> > org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
> >         at
> > org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
> >         at
> > org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
> >         at
> > org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
> >         at
> > org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
> >         at
> > org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
> >         at
> > org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:781)
> >         at
> > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
> > Method)
> >         at
> > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> >         at
> > java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> >         at java.base/java.lang.reflect.Method.invoke(Method.java:566)
> >         at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
> >         at
> > py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
> >         at py4j.Gateway.invoke(Gateway.java:282)
> >         at
> > py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
> >         at py4j.commands.CallCommand.execute(CallCommand.java:79)
> >         at
> > py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
> >         at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
> >         at java.base/java.lang.Thread.run(Thread.java:829)
> > Caused by: java.io.IOException: Mkdirs failed to create
> > file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 7.1 in stage 26.0 (TID
> > 9562) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
> > create
> > file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304432517406660432032_0026_m_000007_9562
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> >
> > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 9.0 in stage 26.0 (TID
> > 9550) (10.42.128.10 executor 9): java.io.IOException: Mkdirs failed to
> > create
> > file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906305148471752983202631_0026_m_000009_9550
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> >
> > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 14.0 in stage 26.0 (TID
> > 9566) (10.42.96.9 executor 7): TaskKilled (Stage cancelled)
> > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 6.0 in stage 26.0 (TID
> > 9547) (10.42.32.12 executor 11): TaskKilled (Stage cancelled)
> > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 13.0 in stage 26.0 (TID
> > 9565) (10.42.0.17 executor 6): TaskKilled (Stage cancelled)
> > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 12.0 in stage 26.0 (TID
> > 9564) (10.42.0.12 executor 3): TaskKilled (Stage cancelled)
> > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 10.2 in stage 26.0 (TID
> > 9560) (10.42.192.9 executor 8): TaskKilled (Stage cancelled)
> > [Stage 26:>                                                       (0 + 1)
> > / 132]
> > ---------------------------------------------------------------------------
> > Py4JJavaError                             Traceback (most recent call last)
> > /tmp/ipykernel_80/610855484.py in <module>
> > ----> 1 df.write.parquet("/opt/spark/work-dir/falk/F01test_df.parquet",
> > mode="overwrite")
> >
> > /opt/spark/python/pyspark/sql/readwriter.py in parquet(self, path, mode,
> > partitionBy, compression)
> >     883             self.partitionBy(partitionBy)
> >     884         self._set_opts(compression=compression)
> > --> 885         self._jwrite.parquet(path)
> >     886
> >     887     def text(self, path, compression=None, lineSep=None):
> >
> > /opt/conda/lib/python3.9/site-packages/py4j/java_gateway.py in
> > __call__(self, *args)
> >    1307
> >    1308         answer = self.gateway_client.send_command(command)
> > -> 1309         return_value = get_return_value(
> >    1310             answer, self.gateway_client, self.target_id, self.name
> > )
> >    1311
> >
> > /opt/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
> >     109     def deco(*a, **kw):
> >     110         try:
> > --> 111             return f(*a, **kw)
> >     112         except py4j.protocol.Py4JJavaError as e:
> >     113             converted = convert_exception(e.java_exception)
> >
> > /opt/conda/lib/python3.9/site-packages/py4j/protocol.py in
> > get_return_value(answer, gateway_client, target_id, name)
> >     324             value = OUTPUT_CONVERTER[type](answer[2:],
> > gateway_client)
> >     325             if answer[1] == REFERENCE_TYPE:
> > --> 326                 raise Py4JJavaError(
> >     327                     "An error occurred while calling {0}{1}{2}.\n".
> >     328                     format(target_id, ".", name), value)
> >
> > Py4JJavaError: An error occurred while calling o15435.parquet.
> > : org.apache.spark.SparkException: Job aborted.
> >         at
> > org.apache.spark.sql.errors.QueryExecutionErrors$.jobAbortedError(QueryExecutionErrors.scala:496)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:251)
> >         at
> > org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
> >         at
> > org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
> >         at
> > org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
> >         at
> > org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
> >         at
> > org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
> >         at
> > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
> >         at
> > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
> >         at
> > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
> >         at
> > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
> >         at
> > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
> >         at
> > org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
> >         at
> > org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
> >         at
> > org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
> >         at
> > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
> >         at
> > org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
> >         at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
> > $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
> >         at
> > org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
> >         at
> > org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
> >         at
> > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> >         at
> > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
> >         at
> > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
> >         at
> > org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
> >         at
> > org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
> >         at
> > org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
> >         at
> > org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
> >         at
> > org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
> >         at
> > org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
> >         at
> > org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
> >         at
> > org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
> >         at
> > org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:781)
> >         at
> > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
> > Method)
> >         at
> > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> >         at
> > java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> >         at java.base/java.lang.reflect.Method.invoke(Method.java:566)
> >         at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
> >         at
> > py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
> >         at py4j.Gateway.invoke(Gateway.java:282)
> >         at
> > py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
> >         at py4j.commands.CallCommand.execute(CallCommand.java:79)
> >         at
> > py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
> >         at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
> >         at java.base/java.lang.Thread.run(Thread.java:829)
> > Caused by: org.apache.spark.SparkException: Job aborted due to stage
> > failure: Task 2 in stage 26.0 failed 4 times, most recent failure: Lost
> > task 2.3 in stage 26.0 (TID 9558) (10.42.240.3 executor 1):
> > java.io.IOException: Mkdirs failed to create
> > file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> >
> > Driver stacktrace:
> >         at
> > org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
> >         at
> > org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
> >         at
> > org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
> >         at
> > scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
> >         at
> > scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
> >         at
> > scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
> >         at
> > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
> >         at
> > org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
> >         at
> > org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
> >         at scala.Option.foreach(Option.scala:407)
> >         at
> > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
> >         at
> > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
> >         at
> > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
> >         at
> > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
> >         at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
> >         at
> > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
> >         at org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
> >         ... 42 more
> > Caused by: java.io.IOException: Mkdirs failed to create
> > file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
> > (exists=false, cwd=file:/opt/spark/work-dir)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
> >         at
> > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
> >         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> >         at
> > org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
> >         at
> > org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
> >         at
> > org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
> >         at
> > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
> >         at
> > org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
> >         at
> > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
> >         at
> > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> >         at org.apache.spark.scheduler.Task.run(Task.scala:131)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> >         at
> > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> >         at
> > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
> >         at
> > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
> >         at java.base/java.lang.Thread.run(Unknown Source)
> > 21/08/29 19:06:31 WARN TaskSetManager: Lost task 3.0 in stage 26.0 (TID
> > 9544) (10.42.0.18 executor 12): TaskKilled (Stage cancelled)
> >
> >
> >
> >
> >
> >
> >
> >
> >
> >
> > ---------------------------------------------------------------------
> > To unsubscribe e-mail: user-unsubscribe@spark.apache.org
> >
> >
> 

---------------------------------------------------------------------
To unsubscribe e-mail: user-unsubscribe@spark.apache.org


Re: Can’t write to PVC in K8S

Posted by Mich Talebzadeh <mi...@gmail.com>.
Hi,

You are trying to write to work-dir inside the docker and create
sub-directories:

The error you are getting is this

Mkdirs failed to create
file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304682784428756208427_0026_m_000000_9563
(exists=false, cwd=file:/opt/spark/work-dir)

That directory /work-dir is not recognised as a valid directory
for storage. It is not in HDFS or HCFS format


From Spark you can write to a bucket outside as a permanent storage.

HTH


   view my Linkedin profile
<https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>



*Disclaimer:* Use it at your own risk. Any and all responsibility for any
loss, damage or destruction of data or any other property which may arise
from relying on this email's technical content is explicitly disclaimed.
The author will in no case be liable for any monetary damages arising from
such loss, damage or destruction.




On Mon, 30 Aug 2021 at 14:11, Bjørn Jørgensen <bj...@gmail.com>
wrote:

> Hi, I have built and running spark on k8s. A link to my repo
> https://github.com/bjornjorgensen/jlpyk8s
>
> Everything seems to be running fine, but I can’t save to PVC.
> If I convert the dataframe to pandas, then I can save it.
>
>
>
> from pyspark.sql import SparkSession
> spark = SparkSession.builder \
>     .master("k8s://https://kubernetes.default.svc.cluster.local:443") \
>     .config("spark.kubernetes.container.image",
> "bjornjorgensen/spark-py:v3.2-290821") \
>     .config("spark.kubernetes.authenticate.caCertFile", "/var/run/secrets/
> kubernetes.io/serviceaccount/ca.crt") \
>     .config("spark.kubernetes.authenticate.oauthTokenFile",
> "/var/run/secrets/kubernetes.io/serviceaccount/token") \
>     .config("spark.kubernetes.authenticate.driver.serviceAccountName",
> "my-pyspark-notebook") \
>     .config("spark.executor.instances", "10") \
>     .config("spark.driver.host",
> "my-pyspark-notebook-spark-driver.default.svc.cluster.local") \
>     .config("spark.driver.port", "29413") \
>
> .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.options.claimName",
> "nfs100") \
>
> .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.path",
> "/opt/spark/work-dir") \
>
> .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.options.claimName",
> "nfs100") \
>
> .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.path",
> "/opt/spark/work-dir") \
>
> .config("spark.kubernetes.driver.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
> "False") \
>
> .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.nfs100.mount.readOnly",
> "False") \
>     .appName("myApp") \
>     .config("spark.sql.repl.eagerEval.enabled", "True") \
>     .config("spark.driver.memory", "4g") \
>     .config("spark.executor.memory", "4g") \
>     .getOrCreate()
> sc = spark.sparkContext
>
> pdf.to_parquet("/opt/spark/work-dir/falk/test/F01test.parquet")
>
>
> 21/08/30 12:20:34 WARN WindowExec: No Partition Defined for Window
> operation! Moving all data to a single partition, this can cause serious
> performance degradation.
> 21/08/30 12:20:34 WARN WindowExec: No Partition Defined for Window
> operation! Moving all data to a single partition, this can cause serious
> performance degradation.
> 21/08/30 12:20:37 WARN WindowExec: No Partition Defined for Window
> operation! Moving all data to a single partition, this can cause serious
> performance degradation.
> 21/08/30 12:20:39 WARN TaskSetManager: Lost task 0.0 in stage 25.0 (TID
> 9497) (10.42.0.16 executor 3): java.io.IOException: Mkdirs failed to create
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220375889526593865835092_0025_m_000000_9497
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
>
> 21/08/30 12:20:40 WARN TaskSetManager: Lost task 0.1 in stage 25.0 (TID
> 9498) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
> create
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220371965695886629589207_0025_m_000000_9498
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
>
> 21/08/30 12:20:42 WARN TaskSetManager: Lost task 0.2 in stage 25.0 (TID
> 9499) (10.42.240.4 executor 4): java.io.IOException: Mkdirs failed to
> create
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220378533320694235394580_0025_m_000000_9499
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
>
> 21/08/30 12:20:43 WARN TaskSetManager: Lost task 0.3 in stage 25.0 (TID
> 9500) (10.42.32.15 executor 10): java.io.IOException: Mkdirs failed to
> create
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
>
> 21/08/30 12:20:43 ERROR TaskSetManager: Task 0 in stage 25.0 failed 4
> times; aborting job
> 21/08/30 12:20:43 ERROR FileFormatWriter: Aborting job
> d98cdc60-bb44-4189-b483-8449fc793658.
> org.apache.spark.SparkException: Job aborted due to stage failure: Task 0
> in stage 25.0 failed 4 times, most recent failure: Lost task 0.3 in stage
> 25.0 (TID 9500) (10.42.32.15 executor 10): java.io.IOException: Mkdirs
> failed to create
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
>
> Driver stacktrace:
>         at
> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
>         at
> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
>         at
> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
>         at
> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
>         at
> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
>         at
> scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
>         at
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
>         at
> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
>         at
> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
>         at scala.Option.foreach(Option.scala:407)
>         at
> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
>         at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
>         at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
>         at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
>         at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
>         at
> org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
>         at org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
>         at
> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
>         at
> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
>         at
> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
>         at
> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
>         at
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
>         at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
>         at
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
>         at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
>         at
> org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>         at
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
>         at
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
>         at
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
>         at
> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
>         at
> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
>         at
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
>         at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
>         at
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
>         at
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
>         at
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>         at
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>         at
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
>         at
> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
>         at
> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
>         at
> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
>         at
> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
>         at
> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
>         at
> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
>         at
> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
>         at
> org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
>         at
> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
> Method)
>         at
> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>         at
> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>         at java.base/java.lang.reflect.Method.invoke(Method.java:566)
>         at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
>         at
> py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
>         at py4j.Gateway.invoke(Gateway.java:282)
>         at
> py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
>         at py4j.commands.CallCommand.execute(CallCommand.java:79)
>         at
> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
>         at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
>         at java.base/java.lang.Thread.run(Thread.java:829)
> Caused by: java.io.IOException: Mkdirs failed to create
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
> ---------------------------------------------------------------------------
> Py4JJavaError                             Traceback (most recent call last)
> /tmp/ipykernel_80/163396320.py in <module>
> ----> 1 pdf.to_parquet("/opt/spark/work-dir/falk/test/F01test.parquet")
>
> /opt/spark/python/pyspark/pandas/frame.py in to_parquet(self, path, mode,
> partition_cols, compression, index_col, **options)
>    4721         if compression is not None:
>    4722             builder.option("compression", compression)
> -> 4723         builder.options(**options).format("parquet").save(path)
>    4724
>    4725     def to_orc(
>
> /opt/spark/python/pyspark/sql/readwriter.py in save(self, path, format,
> mode, partitionBy, **options)
>     738             self._jwrite.save()
>     739         else:
> --> 740             self._jwrite.save(path)
>     741
>     742     @since(1.4)
>
> /opt/conda/lib/python3.9/site-packages/py4j/java_gateway.py in
> __call__(self, *args)
>    1307
>    1308         answer = self.gateway_client.send_command(command)
> -> 1309         return_value = get_return_value(
>    1310             answer, self.gateway_client, self.target_id, self.name
> )
>    1311
>
> /opt/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
>     109     def deco(*a, **kw):
>     110         try:
> --> 111             return f(*a, **kw)
>     112         except py4j.protocol.Py4JJavaError as e:
>     113             converted = convert_exception(e.java_exception)
>
> /opt/conda/lib/python3.9/site-packages/py4j/protocol.py in
> get_return_value(answer, gateway_client, target_id, name)
>     324             value = OUTPUT_CONVERTER[type](answer[2:],
> gateway_client)
>     325             if answer[1] == REFERENCE_TYPE:
> --> 326                 raise Py4JJavaError(
>     327                     "An error occurred while calling {0}{1}{2}.\n".
>     328                     format(target_id, ".", name), value)
>
> Py4JJavaError: An error occurred while calling o4804.save.
> : org.apache.spark.SparkException: Job aborted.
>         at
> org.apache.spark.sql.errors.QueryExecutionErrors$.jobAbortedError(QueryExecutionErrors.scala:496)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:251)
>         at
> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
>         at
> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
>         at
> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
>         at
> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
>         at
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
>         at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
>         at
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
>         at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
>         at
> org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>         at
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
>         at
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
>         at
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
>         at
> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
>         at
> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
>         at
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
>         at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
>         at
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
>         at
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
>         at
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>         at
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>         at
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
>         at
> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
>         at
> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
>         at
> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
>         at
> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
>         at
> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
>         at
> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
>         at
> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
>         at
> org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
>         at
> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
> Method)
>         at
> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>         at
> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>         at java.base/java.lang.reflect.Method.invoke(Method.java:566)
>         at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
>         at
> py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
>         at py4j.Gateway.invoke(Gateway.java:282)
>         at
> py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
>         at py4j.commands.CallCommand.execute(CallCommand.java:79)
>         at
> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
>         at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
>         at java.base/java.lang.Thread.run(Thread.java:829)
> Caused by: org.apache.spark.SparkException: Job aborted due to stage
> failure: Task 0 in stage 25.0 failed 4 times, most recent failure: Lost
> task 0.3 in stage 25.0 (TID 9500) (10.42.32.15 executor 10):
> java.io.IOException: Mkdirs failed to create
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
>
> Driver stacktrace:
>         at
> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
>         at
> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
>         at
> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
>         at
> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
>         at
> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
>         at
> scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
>         at
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
>         at
> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
>         at
> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
>         at scala.Option.foreach(Option.scala:407)
>         at
> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
>         at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
>         at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
>         at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
>         at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
>         at
> org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
>         at org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
>         ... 41 more
> Caused by: java.io.IOException: Mkdirs failed to create
> file:/opt/spark/work-dir/falk/test/F01test.parquet/_temporary/0/_temporary/attempt_202108301220379200778754574276539_0025_m_000000_9500
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
>
>
>
>
> df.write.parquet("/opt/spark/work-dir/falk/F01test_df.parquet",
> mode="overwrite")
>
>
> 21/08/29 19:06:30 WARN TaskSetManager: Lost task 2.0 in stage 26.0 (TID
> 9543) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
> create
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_20210829190630570334759957727637_0026_m_000002_9543
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
>
> 21/08/29 19:06:30 WARN TaskSetManager: Lost task 1.0 in stage 26.0 (TID
> 9542) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
> create
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906306992160257769852924_0026_m_000001_9542
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
>
> 21/08/29 19:06:30 WARN TaskSetManager: Lost task 4.0 in stage 26.0 (TID
> 9545) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to create
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906305635902832664702349_0026_m_000004_9545
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
>
> 21/08/29 19:06:30 WARN TaskSetManager: Lost task 10.0 in stage 26.0 (TID
> 9551) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
> create
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303695223706240035696_0026_m_000010_9551
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
>
> 21/08/29 19:06:30 WARN TaskSetManager: Lost task 2.1 in stage 26.0 (TID
> 9552) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
> create
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303153023682655991980_0026_m_000002_9552
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
>
> 21/08/29 19:06:30 WARN TaskSetManager: Lost task 1.1 in stage 26.0 (TID
> 9553) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to create
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303560179987922074406_0026_m_000001_9553
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
>
> 21/08/29 19:06:31 WARN TaskSetManager: Lost task 2.2 in stage 26.0 (TID
> 9556) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to create
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906307237274992181823763_0026_m_000002_9556
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
>
> 21/08/29 19:06:31 WARN TaskSetManager: Lost task 4.1 in stage 26.0 (TID
> 9554) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
> create
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906306864123848918470508_0026_m_000004_9554
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
>
> 21/08/29 19:06:31 WARN TaskSetManager: Lost task 10.1 in stage 26.0 (TID
> 9555) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
> create
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302601970797047480301_0026_m_000010_9555
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
>
> 21/08/29 19:06:31 WARN TaskSetManager: Lost task 0.0 in stage 26.0 (TID
> 9541) (10.42.192.9 executor 8): java.io.IOException: Mkdirs failed to
> create
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304154332762277555982_0026_m_000000_9541
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
>
> 21/08/29 19:06:31 WARN TaskSetManager: Lost task 7.0 in stage 26.0 (TID
> 9548) (10.42.240.4 executor 4): java.io.IOException: Mkdirs failed to
> create
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_20210829190630694656629969727231_0026_m_000007_9548
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
>
> 21/08/29 19:06:31 WARN TaskSetManager: Lost task 4.2 in stage 26.0 (TID
> 9559) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
> create
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906303848774759656984701_0026_m_000004_9559
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
>
> 21/08/29 19:06:31 WARN TaskSetManager: Lost task 2.3 in stage 26.0 (TID
> 9558) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
> create
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
>
> 21/08/29 19:06:31 ERROR TaskSetManager: Task 2 in stage 26.0 failed 4
> times; aborting job
> 21/08/29 19:06:31 WARN TaskSetManager: Lost task 1.2 in stage 26.0 (TID
> 9557) (10.42.0.12 executor 3): java.io.IOException: Mkdirs failed to create
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302130961573080351978_0026_m_000001_9557
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
>
> 21/08/29 19:06:31 WARN TaskSetManager: Lost task 8.0 in stage 26.0 (TID
> 9549) (10.42.0.17 executor 6): java.io.IOException: Mkdirs failed to create
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906306776907150898092479_0026_m_000008_9549
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
>
> 21/08/29 19:06:31 WARN TaskSetManager: Lost task 5.0 in stage 26.0 (TID
> 9546) (10.42.96.9 executor 7): java.io.IOException: Mkdirs failed to create
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906301725332653584503335_0026_m_000005_9546
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
>
> 21/08/29 19:06:31 WARN TaskSetManager: Lost task 11.0 in stage 26.0 (TID
> 9561) (10.42.240.4 executor 4): java.io.IOException: Mkdirs failed to
> create
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302182889441465469285_0026_m_000011_9561
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
>
> 21/08/29 19:06:31 WARN TaskSetManager: Lost task 0.1 in stage 26.0 (TID
> 9563) (10.42.240.3 executor 1): java.io.IOException: Mkdirs failed to
> create
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304682784428756208427_0026_m_000000_9563
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
>
> 21/08/29 19:06:31 ERROR FileFormatWriter: Aborting job
> 115cf3ce-5a9b-4274-8752-b6ead281f104.
> org.apache.spark.SparkException: Job aborted due to stage failure: Task 2
> in stage 26.0 failed 4 times, most recent failure: Lost task 2.3 in stage
> 26.0 (TID 9558) (10.42.240.3 executor 1): java.io.IOException: Mkdirs
> failed to create
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
>
> Driver stacktrace:
>         at
> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
>         at
> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
>         at
> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
>         at
> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
>         at
> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
>         at
> scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
>         at
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
>         at
> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
>         at
> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
>         at scala.Option.foreach(Option.scala:407)
>         at
> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
>         at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
>         at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
>         at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
>         at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
>         at
> org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
>         at org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
>         at
> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
>         at
> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
>         at
> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
>         at
> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
>         at
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
>         at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
>         at
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
>         at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
>         at
> org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>         at
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
>         at
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
>         at
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
>         at
> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
>         at
> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
>         at
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
>         at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
>         at
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
>         at
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
>         at
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>         at
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>         at
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
>         at
> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
>         at
> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
>         at
> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
>         at
> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
>         at
> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
>         at
> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
>         at
> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
>         at
> org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
>         at
> org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:781)
>         at
> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
> Method)
>         at
> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>         at
> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>         at java.base/java.lang.reflect.Method.invoke(Method.java:566)
>         at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
>         at
> py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
>         at py4j.Gateway.invoke(Gateway.java:282)
>         at
> py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
>         at py4j.commands.CallCommand.execute(CallCommand.java:79)
>         at
> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
>         at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
>         at java.base/java.lang.Thread.run(Thread.java:829)
> Caused by: java.io.IOException: Mkdirs failed to create
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
> 21/08/29 19:06:31 WARN TaskSetManager: Lost task 7.1 in stage 26.0 (TID
> 9562) (10.42.32.11 executor 2): java.io.IOException: Mkdirs failed to
> create
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906304432517406660432032_0026_m_000007_9562
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
>
> 21/08/29 19:06:31 WARN TaskSetManager: Lost task 9.0 in stage 26.0 (TID
> 9550) (10.42.128.10 executor 9): java.io.IOException: Mkdirs failed to
> create
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906305148471752983202631_0026_m_000009_9550
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
>
> 21/08/29 19:06:31 WARN TaskSetManager: Lost task 14.0 in stage 26.0 (TID
> 9566) (10.42.96.9 executor 7): TaskKilled (Stage cancelled)
> 21/08/29 19:06:31 WARN TaskSetManager: Lost task 6.0 in stage 26.0 (TID
> 9547) (10.42.32.12 executor 11): TaskKilled (Stage cancelled)
> 21/08/29 19:06:31 WARN TaskSetManager: Lost task 13.0 in stage 26.0 (TID
> 9565) (10.42.0.17 executor 6): TaskKilled (Stage cancelled)
> 21/08/29 19:06:31 WARN TaskSetManager: Lost task 12.0 in stage 26.0 (TID
> 9564) (10.42.0.12 executor 3): TaskKilled (Stage cancelled)
> 21/08/29 19:06:31 WARN TaskSetManager: Lost task 10.2 in stage 26.0 (TID
> 9560) (10.42.192.9 executor 8): TaskKilled (Stage cancelled)
> [Stage 26:>                                                       (0 + 1)
> / 132]
> ---------------------------------------------------------------------------
> Py4JJavaError                             Traceback (most recent call last)
> /tmp/ipykernel_80/610855484.py in <module>
> ----> 1 df.write.parquet("/opt/spark/work-dir/falk/F01test_df.parquet",
> mode="overwrite")
>
> /opt/spark/python/pyspark/sql/readwriter.py in parquet(self, path, mode,
> partitionBy, compression)
>     883             self.partitionBy(partitionBy)
>     884         self._set_opts(compression=compression)
> --> 885         self._jwrite.parquet(path)
>     886
>     887     def text(self, path, compression=None, lineSep=None):
>
> /opt/conda/lib/python3.9/site-packages/py4j/java_gateway.py in
> __call__(self, *args)
>    1307
>    1308         answer = self.gateway_client.send_command(command)
> -> 1309         return_value = get_return_value(
>    1310             answer, self.gateway_client, self.target_id, self.name
> )
>    1311
>
> /opt/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
>     109     def deco(*a, **kw):
>     110         try:
> --> 111             return f(*a, **kw)
>     112         except py4j.protocol.Py4JJavaError as e:
>     113             converted = convert_exception(e.java_exception)
>
> /opt/conda/lib/python3.9/site-packages/py4j/protocol.py in
> get_return_value(answer, gateway_client, target_id, name)
>     324             value = OUTPUT_CONVERTER[type](answer[2:],
> gateway_client)
>     325             if answer[1] == REFERENCE_TYPE:
> --> 326                 raise Py4JJavaError(
>     327                     "An error occurred while calling {0}{1}{2}.\n".
>     328                     format(target_id, ".", name), value)
>
> Py4JJavaError: An error occurred while calling o15435.parquet.
> : org.apache.spark.SparkException: Job aborted.
>         at
> org.apache.spark.sql.errors.QueryExecutionErrors$.jobAbortedError(QueryExecutionErrors.scala:496)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:251)
>         at
> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
>         at
> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
>         at
> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
>         at
> org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
>         at
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
>         at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
>         at
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
>         at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
>         at
> org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>         at
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
>         at
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
>         at
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
>         at
> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
>         at
> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
>         at
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
>         at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org
> $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
>         at
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
>         at
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
>         at
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>         at
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>         at
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
>         at
> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
>         at
> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
>         at
> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
>         at
> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
>         at
> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
>         at
> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
>         at
> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
>         at
> org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
>         at
> org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:781)
>         at
> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
> Method)
>         at
> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>         at
> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>         at java.base/java.lang.reflect.Method.invoke(Method.java:566)
>         at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
>         at
> py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
>         at py4j.Gateway.invoke(Gateway.java:282)
>         at
> py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
>         at py4j.commands.CallCommand.execute(CallCommand.java:79)
>         at
> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
>         at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
>         at java.base/java.lang.Thread.run(Thread.java:829)
> Caused by: org.apache.spark.SparkException: Job aborted due to stage
> failure: Task 2 in stage 26.0 failed 4 times, most recent failure: Lost
> task 2.3 in stage 26.0 (TID 9558) (10.42.240.3 executor 1):
> java.io.IOException: Mkdirs failed to create
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
>
> Driver stacktrace:
>         at
> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
>         at
> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
>         at
> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
>         at
> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
>         at
> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
>         at
> scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
>         at
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
>         at
> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
>         at
> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
>         at scala.Option.foreach(Option.scala:407)
>         at
> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
>         at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
>         at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
>         at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
>         at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
>         at
> org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
>         at org.apache.spark.SparkContext.runJob(SparkContext.scala:2211)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:218)
>         ... 42 more
> Caused by: java.io.IOException: Mkdirs failed to create
> file:/opt/spark/work-dir/falk/F01test_df.parquet/_temporary/0/_temporary/attempt_202108291906302742392804569950791_0026_m_000002_9558
> (exists=false, cwd=file:/opt/spark/work-dir)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:515)
>         at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at
> org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
>         at
> org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:329)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:482)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
>         at
> org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
>         at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
>         at
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:290)
>         at
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$16(FileFormatWriter.scala:229)
>         at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
>         at org.apache.spark.scheduler.Task.run(Task.scala:131)
>         at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>         at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>         at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
>         at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
>         at java.base/java.lang.Thread.run(Unknown Source)
> 21/08/29 19:06:31 WARN TaskSetManager: Lost task 3.0 in stage 26.0 (TID
> 9544) (10.42.0.18 executor 12): TaskKilled (Stage cancelled)
>
>
>
>
>
>
>
>
>
>
> ---------------------------------------------------------------------
> To unsubscribe e-mail: user-unsubscribe@spark.apache.org
>
>