You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2022/12/21 07:16:31 UTC
[GitHub] [spark] zhengruifeng commented on pull request #39151: [WIP] Introduce a proto message to make plan deterministic
zhengruifeng commented on PR #39151:
URL: https://github.com/apache/spark/pull/39151#issuecomment-1360941255
quick tests:
1, cache (after make some private ones public)
```
scala> val df = spark.range(0, 100)
df: org.apache.spark.sql.Dataset[Long] = [id: bigint]
scala> df.cache()
res0: df.type = [id: bigint]
scala> val plan = df.logicalPlan
plan: org.apache.spark.sql.catalyst.plans.logical.LogicalPlan =
Range (0, 100, step=1, splits=Some(10))
scala> import org.apache.spark.sql.{Column, Dataset, SparkSession}
import org.apache.spark.sql.{Column, Dataset, SparkSession}
scala> val df2 = Dataset.ofRows(spark, plan)
df2: org.apache.spark.sql.DataFrame = [id: bigint]
scala> df.storageLevel
res1: org.apache.spark.storage.StorageLevel = StorageLevel(disk, memory, deserialized, 1 replicas)
scala> df2.storageLevel
res2: org.apache.spark.storage.StorageLevel = StorageLevel(disk, memory, deserialized, 1 replicas)
scala> df == df2
res3: Boolean = false
scala> df2.explain
== Physical Plan ==
InMemoryTableScan [id#0L]
+- InMemoryRelation [id#0L], StorageLevel(disk, memory, deserialized, 1 replicas)
+- *(1) Range (0, 100, step=1, splits=10)
scala> df.explain
== Physical Plan ==
InMemoryTableScan [id#0L]
+- InMemoryRelation [id#0L], StorageLevel(disk, memory, deserialized, 1 replicas)
+- *(1) Range (0, 100, step=1, splits=10)
```
2, relation
```
scala> val df = spark.range(0, 100)
df: org.apache.spark.sql.Dataset[Long] = [id: bigint]
scala> df.write.parquet("/tmp/1.pq")
scala> val df2 = spark.read.parquet("/tmp/1.pq")
df2: org.apache.spark.sql.DataFrame = [id: bigint]
scala> df2.queryExecution.logical.deterministic
res2: Boolean = true
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org