You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2020/04/05 04:15:21 UTC
[spark] branch branch-3.0 updated: [SPARK-30841][SQL][DOC][3.0] Add
version information to the configuration of SQL
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new af7fa9d [SPARK-30841][SQL][DOC][3.0] Add version information to the configuration of SQL
af7fa9d is described below
commit af7fa9d12622c3b2b169c4a58d143bc21ed9e784
Author: beliefer <be...@163.com>
AuthorDate: Sun Apr 5 13:12:56 2020 +0900
[SPARK-30841][SQL][DOC][3.0] Add version information to the configuration of SQL
### What changes were proposed in this pull request?
This PR follows https://github.com/apache/spark/pull/27691 and used to spark3.0.
The configuration only have one difference.
Item name | Since version | JIRA ID | Commit ID | Note
-- | -- | -- | -- | --
spark.sql.pandas.udf.buffer.size | 3.0.0 | SPARK-27870 | 26998b86c13e79582a3df31f6184f825cde45e73#diff-9a6b543db706f1a90f790783d6930a13 | this config changed to spark.sql.execution.pandas.udf.buffer.size in master.
### Why are the changes needed?
Supplemental configuration version information.
### Does this PR introduce any user-facing change?
'No'.
### How was this patch tested?
Jenkins test.
Closes #28116 from beliefer/backport-sqlconf-version.
Authored-by: beliefer <be...@163.com>
Signed-off-by: HyukjinKwon <gu...@apache.org>
---
.../org/apache/spark/sql/internal/SQLConf.scala | 267 ++++++++++++++++++++-
1 file changed, 261 insertions(+), 6 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 9f9e556..7a10aba 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -177,6 +177,7 @@ object SQLConf {
val ANALYZER_MAX_ITERATIONS = buildConf("spark.sql.analyzer.maxIterations")
.internal()
.doc("The max number of iterations the analyzer runs.")
+ .version("3.0.0")
.intConf
.createWithDefault(100)
@@ -185,12 +186,14 @@ object SQLConf {
"specified by their rule names and separated by comma. It is not guaranteed that all the " +
"rules in this configuration will eventually be excluded, as some rules are necessary " +
"for correctness. The optimizer will log the rules that have indeed been excluded.")
+ .version("2.4.0")
.stringConf
.createOptional
val OPTIMIZER_MAX_ITERATIONS = buildConf("spark.sql.optimizer.maxIterations")
.internal()
.doc("The max number of iterations the optimizer runs.")
+ .version("2.0.0")
.intConf
.createWithDefault(100)
@@ -198,6 +201,7 @@ object SQLConf {
buildConf("spark.sql.optimizer.inSetConversionThreshold")
.internal()
.doc("The threshold of set size for InSet conversion.")
+ .version("2.0.0")
.intConf
.createWithDefault(10)
@@ -206,6 +210,7 @@ object SQLConf {
.internal()
.doc("Configures the max set size in InSet for which Spark will generate code with " +
"switch statements. This is applicable only to bytes, shorts, ints, dates.")
+ .version("3.0.0")
.intConf
.checkValue(threshold => threshold >= 0 && threshold <= 600, "The max set size " +
"for using switch statements in InSet must be non-negative and less than or equal to 600")
@@ -216,6 +221,7 @@ object SQLConf {
.doc("Configures the log level for logging the change from the original plan to the new " +
"plan after a rule or batch is applied. The value can be 'trace', 'debug', 'info', " +
"'warn', or 'error'. The default log level is 'trace'.")
+ .version("3.0.0")
.stringConf
.transform(_.toUpperCase(Locale.ROOT))
.checkValue(logLevel => Set("TRACE", "DEBUG", "INFO", "WARN", "ERROR").contains(logLevel),
@@ -227,6 +233,7 @@ object SQLConf {
.internal()
.doc("Configures a list of rules to be logged in the optimizer, in which the rules are " +
"specified by their rule names and separated by comma.")
+ .version("3.0.0")
.stringConf
.createOptional
@@ -234,12 +241,14 @@ object SQLConf {
.internal()
.doc("Configures a list of batches to be logged in the optimizer, in which the batches " +
"are specified by their batch names and separated by comma.")
+ .version("3.0.0")
.stringConf
.createOptional
val DYNAMIC_PARTITION_PRUNING_ENABLED =
buildConf("spark.sql.optimizer.dynamicPartitionPruning.enabled")
.doc("When true, we will generate predicate for partition column when it's used as join key")
+ .version("3.0.0")
.booleanConf
.createWithDefault(true)
@@ -249,6 +258,7 @@ object SQLConf {
.doc("When true, distinct count statistics will be used for computing the data size of the " +
"partitioned table after dynamic partition pruning, in order to evaluate if it is worth " +
"adding an extra subquery as the pruning filter if broadcast reuse is not applicable.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(true)
@@ -259,6 +269,7 @@ object SQLConf {
"used as the fallback filter ratio for computing the data size of the partitioned table " +
"after dynamic partition pruning, in order to evaluate if it is worth adding an extra " +
"subquery as the pruning filter if broadcast reuse is not applicable.")
+ .version("3.0.0")
.doubleConf
.createWithDefault(0.5)
@@ -267,18 +278,21 @@ object SQLConf {
.internal()
.doc("When true, dynamic partition pruning will only apply when the broadcast exchange of " +
"a broadcast hash join operation can be reused as the dynamic pruning filter.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(true)
val COMPRESS_CACHED = buildConf("spark.sql.inMemoryColumnarStorage.compressed")
.doc("When set to true Spark SQL will automatically select a compression codec for each " +
"column based on statistics of the data.")
+ .version("1.0.1")
.booleanConf
.createWithDefault(true)
val COLUMN_BATCH_SIZE = buildConf("spark.sql.inMemoryColumnarStorage.batchSize")
.doc("Controls the size of batches for columnar caching. Larger batch sizes can improve " +
"memory utilization and compression, but risk OOMs when caching data.")
+ .version("1.1.1")
.intConf
.createWithDefault(10000)
@@ -286,6 +300,7 @@ object SQLConf {
buildConf("spark.sql.inMemoryColumnarStorage.partitionPruning")
.internal()
.doc("When true, enable partition pruning for in-memory columnar tables.")
+ .version("1.2.0")
.booleanConf
.createWithDefault(true)
@@ -293,12 +308,14 @@ object SQLConf {
buildConf("spark.sql.inMemoryTableScanStatistics.enable")
.internal()
.doc("When true, enable in-memory table scan accumulators.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(false)
val CACHE_VECTORIZED_READER_ENABLED =
buildConf("spark.sql.inMemoryColumnarStorage.enableVectorizedReader")
.doc("Enables vectorized reader for columnar caching.")
+ .version("2.3.1")
.booleanConf
.createWithDefault(true)
@@ -306,12 +323,14 @@ object SQLConf {
buildConf("spark.sql.columnVector.offheap.enabled")
.internal()
.doc("When true, use OffHeapColumnVector in ColumnarBatch.")
+ .version("2.3.0")
.booleanConf
.createWithDefault(false)
val PREFER_SORTMERGEJOIN = buildConf("spark.sql.join.preferSortMergeJoin")
.internal()
.doc("When true, prefer sort merge join over shuffle hash join.")
+ .version("2.0.0")
.booleanConf
.createWithDefault(true)
@@ -320,6 +339,7 @@ object SQLConf {
.doc("When true, enable use of radix sort when possible. Radix sort is much faster but " +
"requires additional memory to be reserved up-front. The memory overhead may be " +
"significant when sorting very small rows (up to 50% more in this case).")
+ .version("2.0.0")
.booleanConf
.createWithDefault(true)
@@ -330,6 +350,7 @@ object SQLConf {
"command `ANALYZE TABLE <tableName> COMPUTE STATISTICS noscan` has been " +
"run, and file-based data source tables where the statistics are computed directly on " +
"the files of data.")
+ .version("1.1.0")
.bytesConf(ByteUnit.BYTE)
.createWithDefaultString("10MB")
@@ -338,6 +359,7 @@ object SQLConf {
.doc("Minimal increase rate in number of partitions between attempts when executing a take " +
"on a query. Higher values lead to more partitions read. Lower values might lead to " +
"longer execution times as more jobs will be run")
+ .version("2.1.1")
.intConf
.createWithDefault(4)
@@ -345,6 +367,7 @@ object SQLConf {
buildConf("spark.sql.hive.advancedPartitionPredicatePushdown.enabled")
.internal()
.doc("When true, advanced partition predicate pushdown into Hive metastore is enabled.")
+ .version("2.3.0")
.booleanConf
.createWithDefault(true)
@@ -352,6 +375,7 @@ object SQLConf {
.doc("The default number of partitions to use when shuffling data for joins or aggregations. " +
"Note: For structured streaming, this configuration cannot be changed between query " +
"restarts from the same checkpoint location.")
+ .version("1.1.0")
.intConf
.checkValue(_ > 0, "The value of spark.sql.shuffle.partitions must be positive")
.createWithDefault(200)
@@ -360,12 +384,14 @@ object SQLConf {
buildConf("spark.sql.adaptive.shuffle.targetPostShuffleInputSize")
.internal()
.doc("(Deprecated since Spark 3.0)")
+ .version("1.6.0")
.bytesConf(ByteUnit.BYTE)
.createWithDefaultString("64MB")
val ADAPTIVE_EXECUTION_ENABLED = buildConf("spark.sql.adaptive.enabled")
.doc("When true, enable adaptive query execution, which re-optimizes the query plan in the " +
"middle of query execution, based on accurate runtime statistics.")
+ .version("1.6.0")
.booleanConf
.createWithDefault(false)
@@ -375,6 +401,7 @@ object SQLConf {
"sub-queries. By setting this config to true (together with " +
s"'${ADAPTIVE_EXECUTION_ENABLED.key}' set to true), Spark will force apply adaptive query " +
"execution for all supported queries.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(false)
@@ -382,6 +409,7 @@ object SQLConf {
.internal()
.doc("Configures the log level for adaptive execution logging of plan changes. The value " +
"can be 'trace', 'debug', 'info', 'warn', or 'error'. The default log level is 'debug'.")
+ .version("3.0.0")
.stringConf
.transform(_.toUpperCase(Locale.ROOT))
.checkValues(Set("TRACE", "DEBUG", "INFO", "WARN", "ERROR"))
@@ -392,6 +420,7 @@ object SQLConf {
.doc("The advisory size in bytes of the shuffle partition during adaptive optimization " +
s"(when ${ADAPTIVE_EXECUTION_ENABLED.key} is true). It takes effect when Spark " +
"coalesces small shuffle partitions or splits skewed shuffle partition.")
+ .version("3.0.0")
.fallbackConf(SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE)
val COALESCE_PARTITIONS_ENABLED =
@@ -399,6 +428,7 @@ object SQLConf {
.doc(s"When true and '${ADAPTIVE_EXECUTION_ENABLED.key}' is true, Spark will coalesce " +
"contiguous shuffle partitions according to the target size (specified by " +
s"'${ADVISORY_PARTITION_SIZE_IN_BYTES.key}'), to avoid too many small tasks.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(true)
@@ -408,6 +438,7 @@ object SQLConf {
"value is the default parallelism of the Spark cluster. This configuration only " +
s"has an effect when '${ADAPTIVE_EXECUTION_ENABLED.key}' and " +
s"'${COALESCE_PARTITIONS_ENABLED.key}' are both true.")
+ .version("3.0.0")
.intConf
.checkValue(_ > 0, "The minimum number of partitions must be positive.")
.createOptional
@@ -418,6 +449,7 @@ object SQLConf {
s"${SHUFFLE_PARTITIONS.key}. This configuration only has an effect when " +
s"'${ADAPTIVE_EXECUTION_ENABLED.key}' and '${COALESCE_PARTITIONS_ENABLED.key}' " +
"are both true.")
+ .version("3.0.0")
.intConf
.checkValue(_ > 0, "The initial number of partitions must be positive.")
.createOptional
@@ -432,6 +464,7 @@ object SQLConf {
s"'${COALESCE_PARTITIONS_ENABLED.key}' are both true. This feature also depends " +
"on a relocatable serializer, the concatenation support codec in use and the new version " +
"shuffle fetch protocol.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(true)
@@ -440,6 +473,7 @@ object SQLConf {
.doc(s"When true and '${ADAPTIVE_EXECUTION_ENABLED.key}' is true, Spark tries to use local " +
"shuffle reader to read the shuffle data when the shuffle partitioning is not needed, " +
"for example, after converting sort-merge join to broadcast-hash join.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(true)
@@ -448,6 +482,7 @@ object SQLConf {
.doc(s"When true and '${ADAPTIVE_EXECUTION_ENABLED.key}' is true, Spark dynamically " +
"handles skew in sort-merge join by splitting (and replicating if needed) skewed " +
"partitions.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(true)
@@ -456,6 +491,7 @@ object SQLConf {
.doc("A partition is considered as skewed if its size is larger than this factor " +
"multiplying the median partition size and also larger than " +
"'spark.sql.adaptive.skewJoin.skewedPartitionThresholdInBytes'")
+ .version("3.0.0")
.intConf
.checkValue(_ > 0, "The skew factor must be positive.")
.createWithDefault(10)
@@ -466,6 +502,7 @@ object SQLConf {
s"threshold and also larger than '${SKEW_JOIN_SKEWED_PARTITION_FACTOR.key}' " +
"multiplying the median partition size. Ideally this config should be set larger " +
s"than '${ADVISORY_PARTITION_SIZE_IN_BYTES.key}'.")
+ .version("3.0.0")
.bytesConf(ByteUnit.BYTE)
.createWithDefaultString("256MB")
@@ -476,6 +513,7 @@ object SQLConf {
"considered as the build side of a broadcast-hash join in adaptive execution regardless " +
"of its size.This configuration only has an effect when " +
s"'${ADAPTIVE_EXECUTION_ENABLED.key}' is true.")
+ .version("3.0.0")
.doubleConf
.checkValue(_ >= 0, "The non-empty partition ratio must be positive number.")
.createWithDefault(0.2)
@@ -484,6 +522,7 @@ object SQLConf {
buildConf("spark.sql.subexpressionElimination.enabled")
.internal()
.doc("When true, common subexpressions will be eliminated.")
+ .version("1.6.0")
.booleanConf
.createWithDefault(true)
@@ -491,6 +530,7 @@ object SQLConf {
.internal()
.doc("Whether the query analyzer should be case sensitive or not. " +
"Default to case insensitive. It is highly discouraged to turn on case sensitive mode.")
+ .version("1.4.0")
.booleanConf
.createWithDefault(false)
@@ -500,6 +540,7 @@ object SQLConf {
"plan to optimize them. Constraint propagation can sometimes be computationally expensive " +
"for certain kinds of query plans (such as those with a large number of predicates and " +
"aliases) which might negatively impact overall runtime.")
+ .version("2.2.0")
.booleanConf
.createWithDefault(true)
@@ -508,6 +549,7 @@ object SQLConf {
.doc("When true, string literals (including regex patterns) remain escaped in our SQL " +
"parser. The default is false since Spark 2.0. Setting it to true can restore the behavior " +
"prior to Spark 2.0.")
+ .version("2.2.1")
.booleanConf
.createWithDefault(false)
@@ -516,6 +558,7 @@ object SQLConf {
.doc("When estimating the output data size of a table scan, multiply the file size with this " +
"factor as the estimated data size, in case the data is compressed in the file and lead to" +
" a heavily underestimated result.")
+ .version("2.3.1")
.doubleConf
.checkValue(_ > 0, "the value of fileDataSizeFactor must be greater than 0")
.createWithDefault(1.0)
@@ -524,6 +567,7 @@ object SQLConf {
.doc("When true, the Parquet data source merges schemas collected from all data files, " +
"otherwise the schema is picked from the summary file or a random data file " +
"if no summary file is available.")
+ .version("1.5.0")
.booleanConf
.createWithDefault(false)
@@ -532,6 +576,7 @@ object SQLConf {
"summary files and we will ignore them when merging schema. Otherwise, if this is " +
"false, which is the default, we will merge all part-files. This should be considered " +
"as expert-only option, and shouldn't be enabled before knowing what it means exactly.")
+ .version("1.5.0")
.booleanConf
.createWithDefault(false)
@@ -540,6 +585,7 @@ object SQLConf {
"Spark SQL, do not differentiate between binary data and strings when writing out the " +
"Parquet schema. This flag tells Spark SQL to interpret binary data as a string to provide " +
"compatibility with these systems.")
+ .version("1.1.1")
.booleanConf
.createWithDefault(false)
@@ -548,6 +594,7 @@ object SQLConf {
"Spark would also store Timestamp as INT96 because we need to avoid precision lost of the " +
"nanoseconds field. This flag tells Spark SQL to interpret INT96 data as a timestamp to " +
"provide compatibility with these systems.")
+ .version("1.3.0")
.booleanConf
.createWithDefault(true)
@@ -555,6 +602,7 @@ object SQLConf {
.doc("This controls whether timestamp adjustments should be applied to INT96 data when " +
"converting to timestamps, for data written by Impala. This is necessary because Impala " +
"stores INT96 data with a different timezone offset than Hive & Spark.")
+ .version("2.3.0")
.booleanConf
.createWithDefault(false)
@@ -568,6 +616,7 @@ object SQLConf {
"is a standard timestamp type in Parquet, which stores number of microseconds from the " +
"Unix epoch. TIMESTAMP_MILLIS is also standard, but with millisecond precision, which " +
"means Spark has to truncate the microsecond portion of its timestamp value.")
+ .version("2.3.0")
.stringConf
.transform(_.toUpperCase(Locale.ROOT))
.checkValues(ParquetOutputTimestampType.values.map(_.toString))
@@ -579,6 +628,7 @@ object SQLConf {
"precedence would be `compression`, `parquet.compression`, " +
"`spark.sql.parquet.compression.codec`. Acceptable values include: none, uncompressed, " +
"snappy, gzip, lzo, brotli, lz4, zstd.")
+ .version("1.1.1")
.stringConf
.transform(_.toLowerCase(Locale.ROOT))
.checkValues(Set("none", "uncompressed", "snappy", "gzip", "lzo", "lz4", "brotli", "zstd"))
@@ -586,6 +636,7 @@ object SQLConf {
val PARQUET_FILTER_PUSHDOWN_ENABLED = buildConf("spark.sql.parquet.filterPushdown")
.doc("Enables Parquet filter push-down optimization when set to true.")
+ .version("1.2.0")
.booleanConf
.createWithDefault(true)
@@ -593,6 +644,7 @@ object SQLConf {
.doc("If true, enables Parquet filter push-down optimization for Date. " +
s"This configuration only has an effect when '${PARQUET_FILTER_PUSHDOWN_ENABLED.key}' is " +
"enabled.")
+ .version("2.4.0")
.internal()
.booleanConf
.createWithDefault(true)
@@ -602,15 +654,17 @@ object SQLConf {
.doc("If true, enables Parquet filter push-down optimization for Timestamp. " +
s"This configuration only has an effect when '${PARQUET_FILTER_PUSHDOWN_ENABLED.key}' is " +
"enabled and Timestamp stored as TIMESTAMP_MICROS or TIMESTAMP_MILLIS type.")
- .internal()
- .booleanConf
- .createWithDefault(true)
+ .version("2.4.0")
+ .internal()
+ .booleanConf
+ .createWithDefault(true)
val PARQUET_FILTER_PUSHDOWN_DECIMAL_ENABLED =
buildConf("spark.sql.parquet.filterPushdown.decimal")
.doc("If true, enables Parquet filter push-down optimization for Decimal. " +
s"This configuration only has an effect when '${PARQUET_FILTER_PUSHDOWN_ENABLED.key}' is " +
"enabled.")
+ .version("2.4.0")
.internal()
.booleanConf
.createWithDefault(true)
@@ -620,6 +674,7 @@ object SQLConf {
.doc("If true, enables Parquet filter push-down optimization for string startsWith function. " +
s"This configuration only has an effect when '${PARQUET_FILTER_PUSHDOWN_ENABLED.key}' is " +
"enabled.")
+ .version("2.4.0")
.internal()
.booleanConf
.createWithDefault(true)
@@ -632,6 +687,7 @@ object SQLConf {
"By setting this value to 0 this feature can be disabled. " +
s"This configuration only has an effect when '${PARQUET_FILTER_PUSHDOWN_ENABLED.key}' is " +
"enabled.")
+ .version("2.4.0")
.internal()
.intConf
.checkValue(threshold => threshold >= 0, "The threshold must not be negative.")
@@ -643,6 +699,7 @@ object SQLConf {
"systems such as Apache Hive and Apache Impala use. If false, the newer format in Parquet " +
"will be used. For example, decimals will be written in int-based format. If Parquet " +
"output is intended for use with systems that do not support this newer format, set to true.")
+ .version("1.6.0")
.booleanConf
.createWithDefault(false)
@@ -652,6 +709,7 @@ object SQLConf {
"of org.apache.parquet.hadoop.ParquetOutputCommitter. If it is not, then metadata " +
"summaries will never be created, irrespective of the value of " +
"parquet.summary.metadata.level")
+ .version("1.5.0")
.internal()
.stringConf
.createWithDefault("org.apache.parquet.hadoop.ParquetOutputCommitter")
@@ -659,6 +717,7 @@ object SQLConf {
val PARQUET_VECTORIZED_READER_ENABLED =
buildConf("spark.sql.parquet.enableVectorizedReader")
.doc("Enables vectorized parquet decoding.")
+ .version("2.0.0")
.booleanConf
.createWithDefault(true)
@@ -668,12 +727,14 @@ object SQLConf {
s"This configuration only has an effect when '${PARQUET_FILTER_PUSHDOWN_ENABLED.key}' " +
"is enabled and the vectorized reader is not used. You can ensure the vectorized reader " +
s"is not used by setting '${PARQUET_VECTORIZED_READER_ENABLED.key}' to false.")
+ .version("2.3.0")
.booleanConf
.createWithDefault(false)
val PARQUET_VECTORIZED_READER_BATCH_SIZE = buildConf("spark.sql.parquet.columnarReaderBatchSize")
.doc("The number of rows to include in a parquet vectorized reader batch. The number should " +
"be carefully chosen to minimize overhead and avoid OOMs in reading data.")
+ .version("2.4.0")
.intConf
.createWithDefault(4096)
@@ -682,6 +743,7 @@ object SQLConf {
"`orc.compress` is specified in the table-specific options/properties, the precedence " +
"would be `compression`, `orc.compress`, `spark.sql.orc.compression.codec`." +
"Acceptable values include: none, uncompressed, snappy, zlib, lzo.")
+ .version("2.3.0")
.stringConf
.transform(_.toLowerCase(Locale.ROOT))
.checkValues(Set("none", "uncompressed", "snappy", "zlib", "lzo"))
@@ -690,6 +752,7 @@ object SQLConf {
val ORC_IMPLEMENTATION = buildConf("spark.sql.orc.impl")
.doc("When native, use the native version of ORC support instead of the ORC library in Hive. " +
"It is 'hive' by default prior to Spark 2.4.")
+ .version("2.3.0")
.internal()
.stringConf
.checkValues(Set("hive", "native"))
@@ -697,23 +760,27 @@ object SQLConf {
val ORC_VECTORIZED_READER_ENABLED = buildConf("spark.sql.orc.enableVectorizedReader")
.doc("Enables vectorized orc decoding.")
+ .version("2.3.0")
.booleanConf
.createWithDefault(true)
val ORC_VECTORIZED_READER_BATCH_SIZE = buildConf("spark.sql.orc.columnarReaderBatchSize")
.doc("The number of rows to include in a orc vectorized reader batch. The number should " +
"be carefully chosen to minimize overhead and avoid OOMs in reading data.")
+ .version("2.4.0")
.intConf
.createWithDefault(4096)
val ORC_FILTER_PUSHDOWN_ENABLED = buildConf("spark.sql.orc.filterPushdown")
.doc("When true, enable filter pushdown for ORC files.")
+ .version("1.4.0")
.booleanConf
.createWithDefault(true)
val ORC_SCHEMA_MERGING_ENABLED = buildConf("spark.sql.orc.mergeSchema")
.doc("When true, the Orc data source merges schemas collected from all data files, " +
"otherwise the schema is picked from a random data file.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(false)
@@ -721,6 +788,7 @@ object SQLConf {
.doc("When true, check all the partition paths under the table\'s root directory " +
"when reading data stored in HDFS. This configuration will be deprecated in the future " +
s"releases and replaced by ${SPARK_IGNORE_MISSING_FILES.key}.")
+ .version("1.4.0")
.booleanConf
.createWithDefault(false)
@@ -730,6 +798,7 @@ object SQLConf {
"unmatching partitions can be eliminated earlier. This only affects Hive tables " +
"not converted to filesource relations (see HiveUtils.CONVERT_METASTORE_PARQUET and " +
"HiveUtils.CONVERT_METASTORE_ORC for more information).")
+ .version("1.5.0")
.booleanConf
.createWithDefault(true)
@@ -739,6 +808,7 @@ object SQLConf {
"This includes both datasource and converted Hive tables. When partition management " +
"is enabled, datasource tables store partition in the Hive metastore, and use the " +
"metastore to prune partitions during query planning.")
+ .version("2.1.1")
.booleanConf
.createWithDefault(true)
@@ -747,6 +817,7 @@ object SQLConf {
.doc("When nonzero, enable caching of partition file metadata in memory. All tables share " +
"a cache that can use up to specified num bytes for file metadata. This conf only " +
"has an effect when hive filesource partition management is enabled.")
+ .version("2.1.1")
.longConf
.createWithDefault(250 * 1024 * 1024)
@@ -762,6 +833,7 @@ object SQLConf {
"and write it back to the table properties), INFER_ONLY (infer the schema but don't " +
"attempt to write it to the table properties) and NEVER_INFER (the default mode-- fallback " +
"to using the case-insensitive metastore schema instead of inferring).")
+ .version("2.1.1")
.stringConf
.transform(_.toUpperCase(Locale.ROOT))
.checkValues(HiveCaseSensitiveInferenceMode.values.map(_.toString))
@@ -774,23 +846,27 @@ object SQLConf {
"scanned are partition columns and the query has an aggregate operator that satisfies " +
"distinct semantics. By default the optimization is disabled, since it may return " +
"incorrect results when the files are empty.")
+ .version("2.1.1")
.booleanConf
.createWithDefault(false)
val COLUMN_NAME_OF_CORRUPT_RECORD = buildConf("spark.sql.columnNameOfCorruptRecord")
.doc("The name of internal column for storing raw/un-parsed JSON and CSV records that fail " +
"to parse.")
+ .version("1.2.0")
.stringConf
.createWithDefault("_corrupt_record")
val BROADCAST_TIMEOUT = buildConf("spark.sql.broadcastTimeout")
.doc("Timeout in seconds for the broadcast wait time in broadcast joins.")
+ .version("1.3.0")
.timeConf(TimeUnit.SECONDS)
.createWithDefaultString(s"${5 * 60}")
// This is only used for the thriftserver
val THRIFTSERVER_POOL = buildConf("spark.sql.thriftserver.scheduler.pool")
.doc("Set a Fair Scheduler pool for a JDBC client session.")
+ .version("1.1.1")
.stringConf
.createOptional
@@ -798,23 +874,27 @@ object SQLConf {
buildConf("spark.sql.thriftServer.incrementalCollect")
.internal()
.doc("When true, enable incremental collection for execution in Thrift Server.")
+ .version("2.0.3")
.booleanConf
.createWithDefault(false)
val THRIFTSERVER_UI_STATEMENT_LIMIT =
buildConf("spark.sql.thriftserver.ui.retainedStatements")
.doc("The number of SQL statements kept in the JDBC/ODBC web UI history.")
+ .version("1.4.0")
.intConf
.createWithDefault(200)
val THRIFTSERVER_UI_SESSION_LIMIT = buildConf("spark.sql.thriftserver.ui.retainedSessions")
.doc("The number of SQL client sessions kept in the JDBC/ODBC web UI history.")
+ .version("1.4.0")
.intConf
.createWithDefault(200)
// This is used to set the default data source
val DEFAULT_DATA_SOURCE_NAME = buildConf("spark.sql.sources.default")
.doc("The default data source to use in input/output.")
+ .version("1.3.0")
.stringConf
.createWithDefault("parquet")
@@ -823,6 +903,7 @@ object SQLConf {
.doc("When true, a table created by a Hive CTAS statement (no USING clause) " +
"without specifying any storage property will be converted to a data source table, " +
s"using the data source set by ${DEFAULT_DATA_SOURCE_NAME.key}.")
+ .version("2.0.0")
.booleanConf
.createWithDefault(false)
@@ -831,22 +912,26 @@ object SQLConf {
.doc("When true, fast stats (number of files and total size of all files) will be gathered" +
" in parallel while repairing table partitions to avoid the sequential listing in Hive" +
" metastore.")
+ .version("2.0.1")
.booleanConf
.createWithDefault(true)
val PARTITION_COLUMN_TYPE_INFERENCE =
buildConf("spark.sql.sources.partitionColumnTypeInference.enabled")
.doc("When true, automatically infer the data types for partitioned columns.")
+ .version("1.5.0")
.booleanConf
.createWithDefault(true)
val BUCKETING_ENABLED = buildConf("spark.sql.sources.bucketing.enabled")
.doc("When false, we will treat bucketed table as normal table")
+ .version("2.0.0")
.booleanConf
.createWithDefault(true)
val BUCKETING_MAX_BUCKETS = buildConf("spark.sql.sources.bucketing.maxBuckets")
.doc("The maximum number of buckets allowed.")
+ .version("2.4.0")
.intConf
.checkValue(_ > 0, "the value of spark.sql.sources.bucketing.maxBuckets must be greater than 0")
.createWithDefault(100000)
@@ -855,36 +940,42 @@ object SQLConf {
.internal()
.doc("When false, we will throw an error if a query contains a cartesian product without " +
"explicit CROSS JOIN syntax.")
+ .version("2.0.0")
.booleanConf
.createWithDefault(true)
val ORDER_BY_ORDINAL = buildConf("spark.sql.orderByOrdinal")
.doc("When true, the ordinal numbers are treated as the position in the select list. " +
"When false, the ordinal numbers in order/sort by clause are ignored.")
+ .version("2.0.0")
.booleanConf
.createWithDefault(true)
val GROUP_BY_ORDINAL = buildConf("spark.sql.groupByOrdinal")
.doc("When true, the ordinal numbers in group by clauses are treated as the position " +
"in the select list. When false, the ordinal numbers are ignored.")
+ .version("2.0.0")
.booleanConf
.createWithDefault(true)
val GROUP_BY_ALIASES = buildConf("spark.sql.groupByAliases")
.doc("When true, aliases in a select list can be used in group by clauses. When false, " +
"an analysis exception is thrown in the case.")
+ .version("2.2.0")
.booleanConf
.createWithDefault(true)
// The output committer class used by data sources. The specified class needs to be a
// subclass of org.apache.hadoop.mapreduce.OutputCommitter.
val OUTPUT_COMMITTER_CLASS = buildConf("spark.sql.sources.outputCommitterClass")
+ .version("1.4.0")
.internal()
.stringConf
.createOptional
val FILE_COMMIT_PROTOCOL_CLASS =
buildConf("spark.sql.sources.commitProtocolClass")
+ .version("2.1.1")
.internal()
.stringConf
.createWithDefault(
@@ -896,6 +987,7 @@ object SQLConf {
"of detected paths exceeds this value during partition discovery, it tries to list the " +
"files with another Spark distributed job. This configuration is effective only when " +
"using file-based sources such as Parquet, JSON and ORC.")
+ .version("1.5.0")
.intConf
.checkValue(parallel => parallel >= 0, "The maximum number of paths allowed for listing " +
"files at driver side must not be negative")
@@ -905,6 +997,7 @@ object SQLConf {
buildConf("spark.sql.sources.parallelPartitionDiscovery.parallelism")
.doc("The number of parallelism to list a collection of path recursively, Set the " +
"number to prevent file listing from generating too many tasks.")
+ .version("2.1.1")
.internal()
.intConf
.createWithDefault(10000)
@@ -916,6 +1009,7 @@ object SQLConf {
"schedule tasks to take advantage of data locality. It can be particularly " +
"useful if data is read from a remote cluster so the scheduler could never " +
"take advantage of locality anyway.")
+ .version("3.0.0")
.internal()
.booleanConf
.createWithDefault(false)
@@ -924,6 +1018,7 @@ object SQLConf {
// See SPARK-6231.
val DATAFRAME_SELF_JOIN_AUTO_RESOLVE_AMBIGUITY =
buildConf("spark.sql.selfJoinAutoResolveAmbiguity")
+ .version("1.4.0")
.internal()
.booleanConf
.createWithDefault(true)
@@ -931,12 +1026,14 @@ object SQLConf {
val FAIL_AMBIGUOUS_SELF_JOIN_ENABLED =
buildConf("spark.sql.analyzer.failAmbiguousSelfJoin")
.doc("When true, fail the Dataset query if it contains ambiguous self-join.")
+ .version("3.0.0")
.internal()
.booleanConf
.createWithDefault(true)
// Whether to retain group by columns or not in GroupedData.agg.
val DATAFRAME_RETAIN_GROUP_COLUMNS = buildConf("spark.sql.retainGroupColumns")
+ .version("1.4.0")
.internal()
.booleanConf
.createWithDefault(true)
@@ -944,12 +1041,14 @@ object SQLConf {
val DATAFRAME_PIVOT_MAX_VALUES = buildConf("spark.sql.pivotMaxValues")
.doc("When doing a pivot without specifying values for the pivot column this is the maximum " +
"number of (distinct) values that will be collected without error.")
+ .version("1.6.0")
.intConf
.createWithDefault(10000)
val RUN_SQL_ON_FILES = buildConf("spark.sql.runSQLOnFiles")
.internal()
.doc("When true, we could use `datasource`.`path` as table in SQL query.")
+ .version("1.6.0")
.booleanConf
.createWithDefault(true)
@@ -957,6 +1056,7 @@ object SQLConf {
.internal()
.doc("When true, the whole stage (of multiple operators) will be compiled into single java" +
" method.")
+ .version("2.0.0")
.booleanConf
.createWithDefault(true)
@@ -965,6 +1065,7 @@ object SQLConf {
.internal()
.doc("When true, embed the (whole-stage) codegen stage ID into " +
"the class name of the generated class as a suffix")
+ .version("2.3.1")
.booleanConf
.createWithDefault(true)
@@ -972,6 +1073,7 @@ object SQLConf {
.internal()
.doc("The maximum number of fields (including nested fields) that will be supported before" +
" deactivating whole-stage codegen.")
+ .version("2.0.0")
.intConf
.createWithDefault(100)
@@ -981,6 +1083,7 @@ object SQLConf {
"interpreted if any compile error happens. Disabling fallback if `CODEGEN_ONLY`. " +
"`NO_CODEGEN` skips codegen and goes interpreted path always. Note that " +
"this config works only for tests.")
+ .version("2.4.0")
.internal()
.stringConf
.checkValues(CodegenObjectFactoryMode.values.map(_.toString))
@@ -990,12 +1093,14 @@ object SQLConf {
.internal()
.doc("When true, (whole stage) codegen could be temporary disabled for the part of query that" +
" fail to compile generated code")
+ .version("2.0.0")
.booleanConf
.createWithDefault(true)
val CODEGEN_LOGGING_MAX_LINES = buildConf("spark.sql.codegen.logging.maxLines")
.internal()
.doc("The maximum number of codegen lines to log when errors occur. Use -1 for unlimited.")
+ .version("2.3.0")
.intConf
.checkValue(maxLines => maxLines >= -1, "The maximum must be a positive integer, 0 to " +
"disable logging or -1 to apply no limit.")
@@ -1009,6 +1114,7 @@ object SQLConf {
"is the largest bytecode size possible for a valid Java method. When running on HotSpot, " +
s"it may be preferable to set the value to ${CodeGenerator.DEFAULT_JVM_HUGE_METHOD_LIMIT} " +
"to match HotSpot's implementation.")
+ .version("2.3.0")
.intConf
.createWithDefault(65535)
@@ -1020,6 +1126,7 @@ object SQLConf {
"generated, so use the code length as metric. When running on HotSpot, a function's " +
"bytecode should not go beyond 8KB, otherwise it will not be JITted; it also should not " +
"be too small, otherwise there will be many function calls.")
+ .version("3.0.0")
.intConf
.checkValue(threshold => threshold > 0, "The threshold must be a positive integer.")
.createWithDefault(1024)
@@ -1030,6 +1137,7 @@ object SQLConf {
.doc("When true, whole stage codegen would put the logic of consuming rows of each " +
"physical operator into individual methods, instead of a single big method. This can be " +
"used to avoid oversized function that can miss the opportunity of JIT optimization.")
+ .version("2.3.1")
.booleanConf
.createWithDefault(true)
@@ -1037,6 +1145,7 @@ object SQLConf {
.doc("The maximum number of bytes to pack into a single partition when reading files. " +
"This configuration is effective only when using file-based sources such as Parquet, JSON " +
"and ORC.")
+ .version("2.0.0")
.bytesConf(ByteUnit.BYTE)
.createWithDefaultString("128MB") // parquet.block.size
@@ -1047,6 +1156,7 @@ object SQLConf {
" over estimated, then the partitions with small files will be faster than partitions with" +
" bigger files (which is scheduled first). This configuration is effective only when using" +
" file-based sources such as Parquet, JSON and ORC.")
+ .version("2.0.0")
.longConf
.createWithDefault(4 * 1024 * 1024)
@@ -1055,6 +1165,7 @@ object SQLConf {
"encountering corrupted files and the contents that have been read will still be returned. " +
"This configuration is effective only when using file-based sources such as Parquet, JSON " +
"and ORC.")
+ .version("2.1.1")
.booleanConf
.createWithDefault(false)
@@ -1063,24 +1174,28 @@ object SQLConf {
"encountering missing files and the contents that have been read will still be returned. " +
"This configuration is effective only when using file-based sources such as Parquet, JSON " +
"and ORC.")
+ .version("2.3.0")
.booleanConf
.createWithDefault(false)
val MAX_RECORDS_PER_FILE = buildConf("spark.sql.files.maxRecordsPerFile")
.doc("Maximum number of records to write out to a single file. " +
"If this value is zero or negative, there is no limit.")
+ .version("2.2.0")
.longConf
.createWithDefault(0)
val EXCHANGE_REUSE_ENABLED = buildConf("spark.sql.exchange.reuse")
.internal()
.doc("When true, the planner will try to find out duplicated exchanges and re-use them.")
+ .version("2.0.0")
.booleanConf
.createWithDefault(true)
val SUBQUERY_REUSE_ENABLED = buildConf("spark.sql.execution.reuseSubquery")
.internal()
.doc("When true, the planner will try to find out duplicated subqueries and re-use them.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(true)
@@ -1092,6 +1207,7 @@ object SQLConf {
"be a subclass of StateStoreProvider, and must have a zero-arg constructor. " +
"Note: For structured streaming, this configuration cannot be changed between query " +
"restarts from the same checkpoint location.")
+ .version("2.3.0")
.stringConf
.createWithDefault(
"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider")
@@ -1101,6 +1217,7 @@ object SQLConf {
.internal()
.doc("Minimum number of state store delta files that needs to be generated before they " +
"consolidated into snapshots.")
+ .version("2.0.0")
.intConf
.createWithDefault(10)
@@ -1108,24 +1225,28 @@ object SQLConf {
buildConf("spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion")
.internal()
.doc("State format version used by flatMapGroupsWithState operation in a streaming query")
+ .version("2.4.0")
.intConf
.checkValue(v => Set(1, 2).contains(v), "Valid versions are 1 and 2")
.createWithDefault(2)
val CHECKPOINT_LOCATION = buildConf("spark.sql.streaming.checkpointLocation")
.doc("The default location for storing checkpoint data for streaming queries.")
+ .version("2.0.0")
.stringConf
.createOptional
val FORCE_DELETE_TEMP_CHECKPOINT_LOCATION =
buildConf("spark.sql.streaming.forceDeleteTempCheckpointLocation")
.doc("When true, enable temporary checkpoint locations force delete.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(false)
val MIN_BATCHES_TO_RETAIN = buildConf("spark.sql.streaming.minBatchesToRetain")
.internal()
.doc("The minimum number of batches that must be retained and made recoverable.")
+ .version("2.1.1")
.intConf
.createWithDefault(100)
@@ -1135,6 +1256,7 @@ object SQLConf {
"loading from files. The value adjusts a trade-off between memory usage vs cache miss: " +
"'2' covers both success and direct failure cases, '1' covers only success case, " +
"and '0' covers extreme case - disable cache to maximize memory size of executors.")
+ .version("2.4.0")
.intConf
.createWithDefault(2)
@@ -1144,6 +1266,7 @@ object SQLConf {
.doc("State format version used by streaming aggregation operations in a streaming query. " +
"State between versions are tend to be incompatible, so state format version shouldn't " +
"be modified after running.")
+ .version("2.4.0")
.intConf
.checkValue(v => Set(1, 2).contains(v), "Valid versions are 1 and 2")
.createWithDefault(2)
@@ -1154,6 +1277,7 @@ object SQLConf {
"If we find a concurrent active run for a streaming query (in the same or different " +
"SparkSessions on the same cluster) and this flag is true, we will stop the old streaming " +
"query run to start the new one.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(true)
@@ -1163,6 +1287,7 @@ object SQLConf {
.doc("State format version used by streaming join operations in a streaming query. " +
"State between versions are tend to be incompatible, so state format version shouldn't " +
"be modified after running.")
+ .version("3.0.0")
.intConf
.checkValue(v => Set(1, 2).contains(v), "Valid versions are 1 and 2")
.createWithDefault(2)
@@ -1172,6 +1297,7 @@ object SQLConf {
.internal()
.doc("When true, the logical plan for streaming query will be checked for unsupported" +
" operations.")
+ .version("2.0.0")
.booleanConf
.createWithDefault(true)
@@ -1179,6 +1305,7 @@ object SQLConf {
buildConf("spark.sql.variable.substitute")
.doc("This enables substitution using syntax like `${var}`, `${system:var}`, " +
"and `${env:var}`.")
+ .version("2.0.0")
.booleanConf
.createWithDefault(true)
@@ -1189,6 +1316,7 @@ object SQLConf {
"inserted/looked-up at a 1st-level, small, fast map, and then fallback to a " +
"2nd-level, larger, slower map when 1st level is full or keys cannot be found. " +
"When disabled, records go directly to the 2nd level.")
+ .version("2.3.0")
.booleanConf
.createWithDefault(true)
@@ -1196,6 +1324,7 @@ object SQLConf {
buildConf("spark.sql.codegen.aggregate.map.vectorized.enable")
.internal()
.doc("Enable vectorized aggregate hash map. This is for testing/benchmarking only.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(false)
@@ -1205,6 +1334,7 @@ object SQLConf {
.doc("When true, the code generator would split aggregate code into individual methods " +
"instead of a single big method. This can be used to avoid oversized function that " +
"can miss the opportunity of JIT optimization.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(true)
@@ -1216,6 +1346,7 @@ object SQLConf {
"However the DAG depth may become too large and cause unexpected behavior. This " +
"configuration puts a limit on this: when the depth of a view exceeds this value during " +
"analysis, we terminate the resolution to avoid potential errors.")
+ .version("2.2.0")
.intConf
.checkValue(depth => depth > 0, "The maximum depth of a view reference in a nested view " +
"must be positive.")
@@ -1223,6 +1354,7 @@ object SQLConf {
val STREAMING_FILE_COMMIT_PROTOCOL_CLASS =
buildConf("spark.sql.streaming.commitProtocolClass")
+ .version("2.1.0")
.internal()
.stringConf
.createWithDefault("org.apache.spark.sql.execution.streaming.ManifestFileCommitProtocol")
@@ -1235,6 +1367,7 @@ object SQLConf {
"'max' which chooses the maximum across multiple operators. " +
"Note: This configuration cannot be changed between query restarts from the same " +
"checkpoint location.")
+ .version("2.4.0")
.stringConf
.transform(_.toLowerCase(Locale.ROOT))
.checkValue(
@@ -1249,6 +1382,7 @@ object SQLConf {
.doc("In the case of ObjectHashAggregateExec, when the size of the in-memory hash map " +
"grows too large, we will fall back to sort-based aggregation. This option sets a row " +
"count threshold for the size of the hash map.")
+ .version("2.2.0")
.intConf
// We are trying to be conservative and use a relatively small default count threshold here
// since the state object of some TypedImperativeAggregate function can be quite large (e.g.
@@ -1258,6 +1392,7 @@ object SQLConf {
val USE_OBJECT_HASH_AGG = buildConf("spark.sql.execution.useObjectHashAggregateExec")
.internal()
.doc("Decides if we use ObjectHashAggregateExec")
+ .version("2.2.0")
.booleanConf
.createWithDefault(true)
@@ -1266,12 +1401,14 @@ object SQLConf {
.doc("Whether to ignore null fields when generating JSON objects in JSON data source and " +
"JSON functions such as to_json. " +
"If false, it generates null for null fields in JSON objects.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(true)
val FILE_SINK_LOG_DELETION = buildConf("spark.sql.streaming.fileSink.log.deletion")
.internal()
.doc("Whether to delete the expired log files in file stream sink.")
+ .version("2.0.0")
.booleanConf
.createWithDefault(true)
@@ -1280,6 +1417,7 @@ object SQLConf {
.internal()
.doc("Number of log files after which all the previous files " +
"are compacted into the next log file.")
+ .version("2.0.0")
.intConf
.createWithDefault(10)
@@ -1287,12 +1425,14 @@ object SQLConf {
buildConf("spark.sql.streaming.fileSink.log.cleanupDelay")
.internal()
.doc("How long that a file is guaranteed to be visible for all readers.")
+ .version("2.0.0")
.timeConf(TimeUnit.MILLISECONDS)
.createWithDefault(TimeUnit.MINUTES.toMillis(10)) // 10 minutes
val FILE_SOURCE_LOG_DELETION = buildConf("spark.sql.streaming.fileSource.log.deletion")
.internal()
.doc("Whether to delete the expired log files in file stream source.")
+ .version("2.0.1")
.booleanConf
.createWithDefault(true)
@@ -1301,6 +1441,7 @@ object SQLConf {
.internal()
.doc("Number of log files after which all the previous files " +
"are compacted into the next log file.")
+ .version("2.0.1")
.intConf
.createWithDefault(10)
@@ -1308,6 +1449,7 @@ object SQLConf {
buildConf("spark.sql.streaming.fileSource.log.cleanupDelay")
.internal()
.doc("How long in milliseconds a file is guaranteed to be visible for all readers.")
+ .version("2.0.1")
.timeConf(TimeUnit.MILLISECONDS)
.createWithDefault(TimeUnit.MINUTES.toMillis(10)) // 10 minutes
@@ -1317,12 +1459,14 @@ object SQLConf {
.doc("When true, force the schema of streaming file source to be nullable (including all " +
"the fields). Otherwise, the schema might not be compatible with actual data, which " +
"leads to corruptions.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(true)
val FILE_SOURCE_CLEANER_NUM_THREADS =
buildConf("spark.sql.streaming.fileSource.cleaner.numThreads")
.doc("Number of threads used in the file source completed file cleaner.")
+ .version("3.0.0")
.intConf
.createWithDefault(1)
@@ -1330,6 +1474,7 @@ object SQLConf {
buildConf("spark.sql.streaming.schemaInference")
.internal()
.doc("Whether file-based streaming sources will infer its own schema")
+ .version("2.0.0")
.booleanConf
.createWithDefault(false)
@@ -1337,6 +1482,7 @@ object SQLConf {
buildConf("spark.sql.streaming.pollingDelay")
.internal()
.doc("How long to delay polling new data when no data is available")
+ .version("2.0.0")
.timeConf(TimeUnit.MILLISECONDS)
.createWithDefault(10L)
@@ -1344,6 +1490,7 @@ object SQLConf {
buildConf("spark.sql.streaming.stopTimeout")
.doc("How long to wait in milliseconds for the streaming execution thread to stop when " +
"calling the streaming query's stop() method. 0 or negative values wait indefinitely.")
+ .version("3.0.0")
.timeConf(TimeUnit.MILLISECONDS)
.createWithDefaultString("0")
@@ -1351,6 +1498,7 @@ object SQLConf {
buildConf("spark.sql.streaming.noDataProgressEventInterval")
.internal()
.doc("How long to wait between two progress events when there is no data")
+ .version("2.1.1")
.timeConf(TimeUnit.MILLISECONDS)
.createWithDefault(10000L)
@@ -1359,18 +1507,21 @@ object SQLConf {
.doc(
"Whether streaming micro-batch engine will execute batches without data " +
"for eager state management for stateful streaming queries.")
+ .version("2.4.1")
.booleanConf
.createWithDefault(true)
val STREAMING_METRICS_ENABLED =
buildConf("spark.sql.streaming.metricsEnabled")
.doc("Whether Dropwizard/Codahale metrics will be reported for active streaming queries.")
+ .version("2.0.2")
.booleanConf
.createWithDefault(false)
val STREAMING_PROGRESS_RETENTION =
buildConf("spark.sql.streaming.numRecentProgressUpdates")
.doc("The number of progress updates to retain for a streaming query")
+ .version("2.1.1")
.intConf
.createWithDefault(100)
@@ -1378,6 +1529,7 @@ object SQLConf {
buildConf("spark.sql.streaming.checkpointFileManagerClass")
.doc("The class used to write checkpoint files atomically. This class must be a subclass " +
"of the interface CheckpointFileManager.")
+ .version("2.4.0")
.internal()
.stringConf
@@ -1385,6 +1537,7 @@ object SQLConf {
buildConf("spark.sql.streaming.checkpoint.escapedPathCheck.enabled")
.doc("Whether to detect a streaming query may pick up an incorrect checkpoint path due " +
"to SPARK-26824.")
+ .version("3.0.0")
.internal()
.booleanConf
.createWithDefault(true)
@@ -1395,6 +1548,7 @@ object SQLConf {
.doc("When true, SQL commands use parallel file listing, " +
"as opposed to single thread listing. " +
"This usually speeds up commands that need to list many directories.")
+ .version("2.4.1")
.booleanConf
.createWithDefault(true)
@@ -1404,6 +1558,7 @@ object SQLConf {
s"which is larger than `${AUTO_BROADCASTJOIN_THRESHOLD.key}` to be more conservative. " +
"That is to say by default the optimizer will not choose to broadcast a table unless it " +
"knows for sure its size is small enough.")
+ .version("1.1.0")
.bytesConf(ByteUnit.BYTE)
.createWithDefault(Long.MaxValue)
@@ -1414,6 +1569,7 @@ object SQLConf {
"For non-partitioned data source tables, it will be automatically recalculated if table " +
"statistics are not available. For partitioned data source and partitioned Hive tables, " +
s"It is '${DEFAULT_SIZE_IN_BYTES.key}' if table statistics are not available.")
+ .version("2.0.0")
.booleanConf
.createWithDefault(false)
@@ -1422,6 +1578,7 @@ object SQLConf {
.internal()
.doc("The maximum estimation error allowed in HyperLogLog++ algorithm when generating " +
"column level statistics.")
+ .version("2.1.1")
.doubleConf
.createWithDefault(0.05)
@@ -1432,6 +1589,7 @@ object SQLConf {
"histogram. Note that collecting histograms takes extra cost. For example, collecting " +
"column statistics usually takes only one table scan, but generating equi-height " +
"histogram will cause an extra table scan.")
+ .version("2.3.0")
.booleanConf
.createWithDefault(false)
@@ -1439,6 +1597,7 @@ object SQLConf {
buildConf("spark.sql.statistics.histogram.numBins")
.internal()
.doc("The number of bins when generating histograms.")
+ .version("2.3.0")
.intConf
.checkValue(num => num > 1, "The number of bins must be greater than 1.")
.createWithDefault(254)
@@ -1449,6 +1608,7 @@ object SQLConf {
.doc("Accuracy of percentile approximation when generating equi-height histograms. " +
"Larger value means better accuracy. The relative error can be deduced by " +
"1.0 / PERCENTILE_ACCURACY.")
+ .version("2.3.0")
.intConf
.createWithDefault(10000)
@@ -1457,30 +1617,35 @@ object SQLConf {
.doc("Enables automatic update for table size once table's data is changed. Note that if " +
"the total number of files of the table is very large, this can be expensive and slow " +
"down data change commands.")
+ .version("2.3.0")
.booleanConf
.createWithDefault(false)
val CBO_ENABLED =
buildConf("spark.sql.cbo.enabled")
.doc("Enables CBO for estimation of plan statistics when set true.")
+ .version("2.2.0")
.booleanConf
.createWithDefault(false)
val PLAN_STATS_ENABLED =
buildConf("spark.sql.cbo.planStats.enabled")
.doc("When true, the logical plan will fetch row counts and column statistics from catalog.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(false)
val JOIN_REORDER_ENABLED =
buildConf("spark.sql.cbo.joinReorder.enabled")
.doc("Enables join reorder in CBO.")
+ .version("2.2.0")
.booleanConf
.createWithDefault(false)
val JOIN_REORDER_DP_THRESHOLD =
buildConf("spark.sql.cbo.joinReorder.dp.threshold")
.doc("The maximum number of joined nodes allowed in the dynamic programming algorithm.")
+ .version("2.2.0")
.intConf
.checkValue(number => number > 0, "The maximum number must be a positive integer.")
.createWithDefault(12)
@@ -1490,6 +1655,7 @@ object SQLConf {
.internal()
.doc("The weight of cardinality (number of rows) for plan cost comparison in join reorder: " +
"rows * weight + size * (1 - weight).")
+ .version("2.2.0")
.doubleConf
.checkValue(weight => weight >= 0 && weight <= 1, "The weight value must be in [0, 1].")
.createWithDefault(0.7)
@@ -1497,11 +1663,13 @@ object SQLConf {
val JOIN_REORDER_DP_STAR_FILTER =
buildConf("spark.sql.cbo.joinReorder.dp.star.filter")
.doc("Applies star-join filter heuristics to cost based join enumeration.")
+ .version("2.2.0")
.booleanConf
.createWithDefault(false)
val STARSCHEMA_DETECTION = buildConf("spark.sql.cbo.starSchemaDetection")
.doc("When true, it enables join reordering based on star schema detection. ")
+ .version("2.2.0")
.booleanConf
.createWithDefault(false)
@@ -1509,6 +1677,7 @@ object SQLConf {
.internal()
.doc("Specifies the upper limit of the ratio between the largest fact tables" +
" for a star join to be considered. ")
+ .version("2.2.0")
.doubleConf
.createWithDefault(0.9)
@@ -1522,6 +1691,7 @@ object SQLConf {
"Zone offsets must be in the format '(+|-)HH:mm', for example '-08:00' or '+01:00'. " +
"Also 'UTC' and 'Z' are supported as aliases of '+00:00'. Other short names are not " +
"recommended to use because they can be ambiguous.")
+ .version("2.2.0")
.stringConf
.checkValue(isValidTimezone, s"Cannot resolve the given timezone with" +
" ZoneId.of(_, ZoneId.SHORT_IDS)")
@@ -1531,6 +1701,7 @@ object SQLConf {
buildConf("spark.sql.windowExec.buffer.in.memory.threshold")
.internal()
.doc("Threshold for number of rows guaranteed to be held in memory by the window operator")
+ .version("2.2.1")
.intConf
.createWithDefault(4096)
@@ -1538,6 +1709,7 @@ object SQLConf {
buildConf("spark.sql.windowExec.buffer.spill.threshold")
.internal()
.doc("Threshold for number of rows to be spilled by window operator")
+ .version("2.2.0")
.intConf
.createWithDefault(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD.defaultValue.get)
@@ -1546,6 +1718,7 @@ object SQLConf {
.internal()
.doc("Threshold for number of rows guaranteed to be held in memory by the sort merge " +
"join operator")
+ .version("2.2.1")
.intConf
.createWithDefault(ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH)
@@ -1553,6 +1726,7 @@ object SQLConf {
buildConf("spark.sql.sortMergeJoinExec.buffer.spill.threshold")
.internal()
.doc("Threshold for number of rows to be spilled by sort merge join operator")
+ .version("2.2.0")
.intConf
.createWithDefault(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD.defaultValue.get)
@@ -1561,6 +1735,7 @@ object SQLConf {
.internal()
.doc("Threshold for number of rows guaranteed to be held in memory by the cartesian " +
"product operator")
+ .version("2.2.1")
.intConf
.createWithDefault(4096)
@@ -1568,12 +1743,14 @@ object SQLConf {
buildConf("spark.sql.cartesianProductExec.buffer.spill.threshold")
.internal()
.doc("Threshold for number of rows to be spilled by cartesian product operator")
+ .version("2.2.0")
.intConf
.createWithDefault(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD.defaultValue.get)
val SUPPORT_QUOTED_REGEX_COLUMN_NAME = buildConf("spark.sql.parser.quotedRegexColumnNames")
.doc("When true, quoted Identifiers (using backticks) in SELECT statement are interpreted" +
" as regular expressions.")
+ .version("2.3.0")
.booleanConf
.createWithDefault(false)
@@ -1582,12 +1759,14 @@ object SQLConf {
.internal()
.doc("Number of points to sample per partition in order to determine the range boundaries" +
" for range partitioning, typically used in global sorting (without limit).")
+ .version("2.3.0")
.intConf
.createWithDefault(100)
val ARROW_EXECUTION_ENABLED =
buildConf("spark.sql.execution.arrow.enabled")
.doc("(Deprecated since Spark 3.0, please set 'spark.sql.execution.arrow.pyspark.enabled'.)")
+ .version("2.3.0")
.booleanConf
.createWithDefault(false)
@@ -1599,6 +1778,7 @@ object SQLConf {
"2. pyspark.sql.SparkSession.createDataFrame when its input is a Pandas DataFrame " +
"The following data types are unsupported: " +
"BinaryType, MapType, ArrayType of TimestampType, and nested StructType.")
+ .version("3.0.0")
.fallbackConf(ARROW_EXECUTION_ENABLED)
val ARROW_SPARKR_EXECUTION_ENABLED =
@@ -1611,6 +1791,7 @@ object SQLConf {
"4. gapply " +
"The following data types are unsupported: " +
"FloatType, BinaryType, ArrayType, StructType and MapType.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(false)
@@ -1618,6 +1799,7 @@ object SQLConf {
buildConf("spark.sql.execution.arrow.fallback.enabled")
.doc("(Deprecated since Spark 3.0, please set " +
"'spark.sql.execution.arrow.pyspark.fallback.enabled'.)")
+ .version("2.4.0")
.booleanConf
.createWithDefault(true)
@@ -1625,12 +1807,14 @@ object SQLConf {
buildConf("spark.sql.execution.arrow.pyspark.fallback.enabled")
.doc(s"When true, optimizations enabled by '${ARROW_PYSPARK_EXECUTION_ENABLED.key}' will " +
"fallback automatically to non-optimized implementations if an error occurs.")
+ .version("3.0.0")
.fallbackConf(ARROW_FALLBACK_ENABLED)
val ARROW_EXECUTION_MAX_RECORDS_PER_BATCH =
buildConf("spark.sql.execution.arrow.maxRecordsPerBatch")
.doc("When using Apache Arrow, limit the maximum number of records that can be written " +
"to a single ArrowRecordBatch in memory. If set to zero or negative there is no limit.")
+ .version("2.3.0")
.intConf
.createWithDefault(10000)
@@ -1641,6 +1825,7 @@ object SQLConf {
s"set, the fallback is `${BUFFER_SIZE.key}`. Note that Pandas execution requires more " +
"than 4 bytes. Lowering this value could make small Pandas UDF batch iterated and " +
"pipelined; however, it might degrade performance. See SPARK-27870.")
+ .version("3.0.0")
.fallbackConf(BUFFER_SIZE)
val PANDAS_GROUPED_MAP_ASSIGN_COLUMNS_BY_NAME =
@@ -1650,6 +1835,7 @@ object SQLConf {
"to use position if not. When false, a grouped map Pandas UDF will assign columns from " +
"the returned Pandas DataFrame based on position, regardless of column label type. " +
"This configuration will be deprecated in future releases.")
+ .version("2.4.1")
.booleanConf
.createWithDefault(true)
@@ -1660,6 +1846,7 @@ object SQLConf {
"Pandas.Series to Arrow array during serialization. Arrow will raise errors " +
"when detecting unsafe type conversion like overflow. When false, disabling Arrow's type " +
"check and do type conversions anyway. This config only works for Arrow 0.11.0+.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(false)
@@ -1673,6 +1860,7 @@ object SQLConf {
" column names of the left node are distinct. If all the conditions are met, the" +
" rule will replace the except operation with a Filter by flipping the filter" +
" condition(s) of the right node.")
+ .version("2.3.0")
.booleanConf
.createWithDefault(true)
@@ -1683,6 +1871,7 @@ object SQLConf {
"happens according to Hive behavior and SQL ANSI 2011 specification, ie. rounding the " +
"decimal part of the result if an exact representation is not possible. Otherwise, NULL " +
"is returned in those cases, as previously.")
+ .version("2.3.1")
.booleanConf
.createWithDefault(true)
@@ -1692,6 +1881,7 @@ object SQLConf {
.doc("When integral literal is used in decimal operations, pick a minimum precision " +
"required by the literal if this config is true, to make the resulting precision and/or " +
"scale smaller. This can reduce the possibility of precision lose and/or overflow.")
+ .version("2.3.3")
.booleanConf
.createWithDefault(true)
@@ -1701,6 +1891,7 @@ object SQLConf {
"information. The values of options whose names that match this regex will be redacted " +
"in the explain output. This redaction is applied on top of the global redaction " +
s"configuration defined by ${SECRET_REDACTION_PATTERN.key}.")
+ .version("2.2.2")
.regexConf
.createWithDefault("(?i)url".r)
@@ -1710,17 +1901,20 @@ object SQLConf {
"information. When this regex matches a string part, that string part is replaced by a " +
"dummy value. This is currently used to redact the output of SQL explain commands. " +
"When this conf is not set, the value from `spark.redaction.string.regex` is used.")
+ .version("2.3.0")
.fallbackConf(org.apache.spark.internal.config.STRING_REDACTION_PATTERN)
val CONCAT_BINARY_AS_STRING = buildConf("spark.sql.function.concatBinaryAsString")
.doc("When this option is set to false and all inputs are binary, `functions.concat` returns " +
"an output as binary. Otherwise, it returns as a string.")
+ .version("2.3.0")
.booleanConf
.createWithDefault(false)
val ELT_OUTPUT_AS_STRING = buildConf("spark.sql.function.eltOutputAsString")
.doc("When this option is set to false and all inputs are binary, `elt` returns " +
"an output as binary. Otherwise, it returns as a string.")
+ .version("2.3.0")
.booleanConf
.createWithDefault(false)
@@ -1731,6 +1925,7 @@ object SQLConf {
"user-specified schema. If the validation fails, a runtime exception is thrown. " +
"When this option is set to false, the partition column value will be converted to null " +
"if it can not be casted to corresponding user-specified schema.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(true)
@@ -1738,6 +1933,7 @@ object SQLConf {
buildConf("spark.sql.streaming.continuous.epochBacklogQueueSize")
.doc("The max number of entries to be stored in queue to wait for late epochs. " +
"If this parameter is exceeded by the size of the queue, stream will stop with an error.")
+ .version("3.0.0")
.intConf
.createWithDefault(10000)
@@ -1746,6 +1942,7 @@ object SQLConf {
.internal()
.doc("The size (measured in number of rows) of the queue used in continuous execution to" +
" buffer the results of a ContinuousDataReader.")
+ .version("2.3.0")
.intConf
.createWithDefault(1024)
@@ -1754,6 +1951,7 @@ object SQLConf {
.internal()
.doc("The interval at which continuous execution readers will poll to check whether" +
" the epoch has advanced on the driver.")
+ .version("2.3.0")
.timeConf(TimeUnit.MILLISECONDS)
.createWithDefault(100)
@@ -1762,12 +1960,14 @@ object SQLConf {
.doc("A comma-separated list of data source short names or fully qualified data source " +
"implementation class names for which Data Source V2 code path is disabled. These data " +
"sources will fallback to Data Source V1 code path.")
+ .version("3.0.0")
.stringConf
.createWithDefault("avro,csv,json,kafka,orc,parquet,text")
val DISABLED_V2_STREAMING_WRITERS = buildConf("spark.sql.streaming.disabledV2Writers")
.doc("A comma-separated list of fully qualified data source register class names for which" +
" StreamWriteSupport is disabled. Writes to these sources will fall back to the V1 Sinks.")
+ .version("2.3.1")
.stringConf
.createWithDefault("")
@@ -1778,6 +1978,7 @@ object SQLConf {
"A comma-separated list of fully qualified data source register class names for which " +
"MicroBatchReadSupport is disabled. Reads from these sources will fall back to the " +
"V1 Sources.")
+ .version("2.4.0")
.stringConf
.createWithDefault("")
@@ -1798,6 +1999,7 @@ object SQLConf {
"(which takes precedence over this setting), e.g. " +
"dataframe.write.option(\"partitionOverwriteMode\", \"dynamic\").save(path)."
)
+ .version("2.3.0")
.stringConf
.transform(_.toUpperCase(Locale.ROOT))
.checkValues(PartitionOverwriteMode.values.map(_.toString))
@@ -1822,6 +2024,7 @@ object SQLConf {
"in type coercion, e.g. converting `double` to `int` or `decimal` to `double` is " +
"not allowed."
)
+ .version("3.0.0")
.stringConf
.transform(_.toUpperCase(Locale.ROOT))
.checkValues(StoreAssignmentPolicy.values.map(_.toString))
@@ -1832,6 +2035,7 @@ object SQLConf {
"throw a runtime exception if an overflow occurs in any operation on integral/decimal " +
"field. 2. Spark will forbid using the reserved keywords of ANSI SQL as identifiers in " +
"the SQL parser.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(false)
@@ -1844,6 +2048,7 @@ object SQLConf {
"issues. Turn on this config to insert a local sort before actually doing repartition " +
"to generate consistent repartition results. The performance of repartition() may go " +
"down since we insert extra local sort before it.")
+ .version("2.1.4")
.booleanConf
.createWithDefault(true)
@@ -1854,6 +2059,7 @@ object SQLConf {
"satisfying a query. This optimization allows columnar file format readers to avoid " +
"reading unnecessary nested column data. Currently Parquet and ORC are the " +
"data sources that implement this optimization.")
+ .version("2.4.1")
.booleanConf
.createWithDefault(true)
@@ -1864,6 +2070,7 @@ object SQLConf {
"containing `dots` to data sources. Currently, Parquet implements both optimizations " +
"while ORC only supports predicates for names containing `dots`. The other data sources" +
"don't support this feature yet.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(true)
@@ -1873,6 +2080,7 @@ object SQLConf {
.doc("Prune nested fields from object serialization operator which are unnecessary in " +
"satisfying a query. This optimization allows object serializers to avoid " +
"executing unnecessary nested expressions.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(true)
@@ -1883,6 +2091,7 @@ object SQLConf {
"satisfying a query. Note that this optimization doesn't prune nested fields from " +
"physical data source scanning. For pruning nested fields from scanning, please use " +
"`spark.sql.optimizer.nestedSchemaPruning.enabled` config.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(true)
@@ -1892,6 +2101,7 @@ object SQLConf {
.doc("In SQL queries with a SORT followed by a LIMIT like " +
"'SELECT x FROM t ORDER BY y LIMIT m', if m is under this threshold, do a top-K sort" +
" in memory, otherwise do a global sort which spills to disk if necessary.")
+ .version("2.4.0")
.intConf
.createWithDefault(ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH)
@@ -1907,6 +2117,7 @@ object SQLConf {
.internal()
.doc("If it is set to true, column names of the requested schema are passed to CSV parser. " +
"Other column values can be ignored during parsing even if they are malformed.")
+ .version("2.4.0")
.booleanConf
.createWithDefault(true)
@@ -1917,6 +2128,7 @@ object SQLConf {
"Jupyter, the HTML table (generated by _repr_html_) will be returned. For plain Python " +
"REPL, the returned outputs are formatted like dataframe.show(). In SparkR, the returned " +
"outputs are showed similar to R data.frame would.")
+ .version("2.4.0")
.booleanConf
.createWithDefault(false)
@@ -1925,12 +2137,14 @@ object SQLConf {
s"effect when ${REPL_EAGER_EVAL_ENABLED.key} is set to true. The valid range of this " +
"config is from 0 to (Int.MaxValue - 1), so the invalid config like negative and " +
"greater than (Int.MaxValue - 1) will be normalized to 0 and (Int.MaxValue - 1).")
+ .version("2.4.0")
.intConf
.createWithDefault(20)
val REPL_EAGER_EVAL_TRUNCATE = buildConf("spark.sql.repl.eagerEval.truncate")
.doc("The max number of characters for each cell that is returned by eager evaluation. " +
s"This only takes effect when ${REPL_EAGER_EVAL_ENABLED.key} is set to true.")
+ .version("2.4.0")
.intConf
.createWithDefault(20)
@@ -1941,6 +2155,7 @@ object SQLConf {
"by the fast hash aggregate product operator. The bit is not for actual value, " +
"but the actual numBuckets is determined by loadFactor " +
"(e.g: default bit value 16 , the actual numBuckets is ((1 << 16) / 0.5).")
+ .version("2.4.0")
.intConf
.checkValue(bit => bit >= 10 && bit <= 30, "The bit value must be in [10, 30].")
.createWithDefault(16)
@@ -1948,6 +2163,7 @@ object SQLConf {
val AVRO_COMPRESSION_CODEC = buildConf("spark.sql.avro.compression.codec")
.doc("Compression codec used in writing of AVRO files. Supported codecs: " +
"uncompressed, deflate, snappy, bzip2 and xz. Default codec is snappy.")
+ .version("2.4.0")
.stringConf
.checkValues(Set("uncompressed", "deflate", "snappy", "bzip2", "xz"))
.createWithDefault("snappy")
@@ -1956,6 +2172,7 @@ object SQLConf {
.doc("Compression level for the deflate codec used in writing of AVRO files. " +
"Valid value must be in the range of from 1 to 9 inclusive or -1. " +
"The default value is -1 which corresponds to 6 level in the current implementation.")
+ .version("2.4.0")
.intConf
.checkValues((1 to 9).toSet + Deflater.DEFAULT_COMPRESSION)
.createWithDefault(Deflater.DEFAULT_COMPRESSION)
@@ -1964,6 +2181,7 @@ object SQLConf {
.internal()
.doc(s"If it is set to false, or ${ANSI_ENABLED.key} is true, then size of null returns " +
"null. Otherwise, it returns -1, which was inherited from Hive.")
+ .version("2.4.0")
.booleanConf
.createWithDefault(true)
@@ -1972,6 +2190,7 @@ object SQLConf {
.internal()
.doc("If it is set to true, the data source provider com.databricks.spark.avro is mapped " +
"to the built-in but external Avro data source module for backward compatibility.")
+ .version("2.4.0")
.booleanConf
.createWithDefault(true)
@@ -1982,6 +2201,7 @@ object SQLConf {
"set operations are performed from left to right as they appear in the query. When set " +
"to false and order of evaluation is not specified by parentheses, INTERSECT operations " +
"are performed before any UNION, EXCEPT and MINUS operations.")
+ .version("2.4.0")
.booleanConf
.createWithDefault(false)
@@ -1990,6 +2210,7 @@ object SQLConf {
.internal()
.doc("When set to true, a literal with an exponent (e.g. 1E-30) would be parsed " +
"as Decimal rather than Double.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(false)
@@ -1999,6 +2220,7 @@ object SQLConf {
.doc("When set to true, negative scale of Decimal type is allowed. For example, " +
"the type of number 1E10BD under legacy mode is DecimalType(2, -9), but is " +
"Decimal(11, 0) in non legacy mode.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(false)
@@ -2007,6 +2229,7 @@ object SQLConf {
.internal()
.doc("When set to true, CREATE TABLE syntax without a provider will use hive " +
s"instead of the value of ${DEFAULT_DATA_SOURCE_NAME.key}.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(false)
@@ -2015,14 +2238,16 @@ object SQLConf {
.internal()
.doc("When true, the bucketed table scan will list files during planning to figure out the " +
"output ordering, which is expensive and may make the planning quite slow.")
- .booleanConf
- .createWithDefault(false)
+ .version("3.0.0")
+ .booleanConf
+ .createWithDefault(false)
val LEGACY_HAVING_WITHOUT_GROUP_BY_AS_WHERE =
buildConf("spark.sql.legacy.parser.havingWithoutGroupByAsWhere")
.internal()
.doc("If it is set to true, the parser will treat HAVING without GROUP BY as a normal " +
"WHERE, which does not follow SQL standard.")
+ .version("2.4.1")
.booleanConf
.createWithDefault(false)
@@ -2030,7 +2255,8 @@ object SQLConf {
buildConf("spark.sql.legacy.json.allowEmptyString.enabled")
.internal()
.doc("When set to true, the parser of JSON data source treats empty strings as null for " +
- "some data types such as `IntegerType`.")
+ "some data types such as `IntegerType`.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(false)
@@ -2040,6 +2266,7 @@ object SQLConf {
.doc("When set to true, Spark returns an empty collection with `StringType` as element " +
"type if the `array`/`map` function is called without any parameters. Otherwise, Spark " +
"returns an empty collection with `NullType` as element type.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(false)
@@ -2048,6 +2275,7 @@ object SQLConf {
.internal()
.doc("When set to true, user is allowed to use org.apache.spark.sql.functions." +
"udf(f: AnyRef, dataType: DataType). Otherwise, an exception will be thrown at runtime.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(false)
@@ -2056,6 +2284,7 @@ object SQLConf {
.internal()
.doc("When set to true, TRUNCATE TABLE command will not try to set back original " +
"permission and ACLs when re-creating the table/partition paths.")
+ .version("2.4.6")
.booleanConf
.createWithDefault(false)
@@ -2065,6 +2294,7 @@ object SQLConf {
.doc("When set to true, the key attribute resulted from running `Dataset.groupByKey` " +
"for non-struct key type, will be named as `value`, following the behavior of Spark " +
"version 2.4 and earlier.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(false)
@@ -2072,6 +2302,7 @@ object SQLConf {
.doc("Maximum number of fields of sequence-like entries can be converted to strings " +
"in debug output. Any elements beyond the limit will be dropped and replaced by a" +
""" "... N more fields" placeholder.""")
+ .version("3.0.0")
.intConf
.createWithDefault(25)
@@ -2080,6 +2311,7 @@ object SQLConf {
"longer, further output will be truncated. The default setting always generates a full " +
"plan. Set this to a lower value such as 8k if plan strings are taking up too much " +
"memory or are causing OutOfMemory errors in the driver or UI processes.")
+ .version("3.0.0")
.bytesConf(ByteUnit.BYTE)
.checkValue(i => i >= 0 && i <= ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH, "Invalid " +
"value for 'spark.sql.maxPlanStringLength'. Length must be a valid string length " +
@@ -2091,6 +2323,7 @@ object SQLConf {
.internal()
.doc("If it is set to true, SET command will fail when the key is registered as " +
"a SparkConf entry.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(true)
@@ -2099,6 +2332,7 @@ object SQLConf {
"java.time.LocalDate classes of Java 8 API are used as external types for " +
"Catalyst's TimestampType and DateType. If it is set to false, java.sql.Timestamp " +
"and java.sql.Date are used for the same purpose.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(false)
@@ -2106,6 +2340,7 @@ object SQLConf {
.doc("The max length of a file that can be read by the binary file data source. " +
"Spark will fail fast and not attempt to read the file if its length exceeds this value. " +
"The theoretical max is Int.MaxValue, though VMs might implement a smaller max.")
+ .version("3.0.0")
.internal()
.intConf
.createWithDefault(Int.MaxValue)
@@ -2115,12 +2350,14 @@ object SQLConf {
.internal()
.doc("If it is set to true, date/timestamp will cast to string in binary comparisons " +
"with String")
+ .version("3.0.0")
.booleanConf
.createWithDefault(false)
val DEFAULT_CATALOG = buildConf("spark.sql.defaultCatalog")
.doc("Name of the default catalog. This will be the current catalog if users have not " +
"explicitly set the current catalog yet.")
+ .version("3.0.0")
.stringConf
.createWithDefault(SESSION_CATALOG_NAME)
@@ -2132,6 +2369,7 @@ object SQLConf {
s"be loaded by the $SESSION_CATALOG_NAME, this catalog must also return the table " +
s"metadata. To delegate operations to the $SESSION_CATALOG_NAME, implementations can " +
"extend 'CatalogExtension'.")
+ .version("3.0.0")
.stringConf
.createOptional
@@ -2144,6 +2382,7 @@ object SQLConf {
"MapFromEntries, StringToMap, MapConcat and TransformKeys. When EXCEPTION, the query " +
"fails if duplicated map keys are detected. When LAST_WIN, the map key that is inserted " +
"at last takes precedence.")
+ .version("3.0.0")
.stringConf
.transform(_.toUpperCase(Locale.ROOT))
.checkValues(MapKeyDedupPolicy.values.map(_.toString))
@@ -2152,6 +2391,7 @@ object SQLConf {
val LEGACY_LOOSE_UPCAST = buildConf("spark.sql.legacy.doLooseUpcast")
.internal()
.doc("When true, the upcast will be loose and allows string to atomic types.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(false)
@@ -2165,6 +2405,7 @@ object SQLConf {
"CORRECTED, inner CTE definitions take precedence. The default value is EXCEPTION, " +
"AnalysisException is thrown while name conflict is detected in nested CTE. This config " +
"will be removed in future versions and CORRECTED will be the only behavior.")
+ .version("3.0.0")
.stringConf
.transform(_.toUpperCase(Locale.ROOT))
.checkValues(LegacyBehaviorPolicy.values.map(_.toString))
@@ -2177,6 +2418,7 @@ object SQLConf {
"When set to CORRECTED, classes from java.time.* packages are used for the same purpose. " +
"The default value is EXCEPTION, RuntimeException is thrown when we will get different " +
"results.")
+ .version("3.0.0")
.stringConf
.transform(_.toUpperCase(Locale.ROOT))
.checkValues(LegacyBehaviorPolicy.values.map(_.toString))
@@ -2186,6 +2428,7 @@ object SQLConf {
buildConf("spark.sql.legacy.followThreeValuedLogicInArrayExists")
.internal()
.doc("When true, the ArrayExists will follow the three-valued boolean logic.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(true)
@@ -2194,6 +2437,7 @@ object SQLConf {
.doc("A comma-delimited string config of the optional additional remote Maven mirror " +
"repositories. This is only used for downloading Hive jars in IsolatedClientLoader " +
"if the default Maven Central repo is unreachable.")
+ .version("3.0.0")
.stringConf
.createWithDefault(
"https://maven-central.storage-download.googleapis.com/maven2/")
@@ -2206,6 +2450,7 @@ object SQLConf {
"all interval units out of the specified range. If it is set to `false`, " +
"`ParseException` is thrown if the input does not match to the pattern " +
"defined by `from` and `to`.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(false)
@@ -2215,6 +2460,7 @@ object SQLConf {
.doc("When true, all database and table properties are not reserved and available for " +
"create/alter syntaxes. But please be aware that the reserved properties will be " +
"silently removed.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(false)
@@ -2223,6 +2469,7 @@ object SQLConf {
.internal()
.doc("When true, only a single file can be added using ADD FILE. If false, then users " +
"can add directory by passing directory path to ADD FILE.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(false)
@@ -2230,11 +2477,13 @@ object SQLConf {
buildConf("spark.sql.legacy.mssqlserver.numericMapping.enabled")
.internal()
.doc("When true, use legacy MySqlServer SMALLINT and REAL type mapping.")
+ .version("2.4.5")
.booleanConf
.createWithDefault(false)
val CSV_FILTER_PUSHDOWN_ENABLED = buildConf("spark.sql.csv.filterPushdown.enabled")
.doc("When true, enable filter pushdown to CSV datasource.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(true)
@@ -2244,6 +2493,7 @@ object SQLConf {
.doc("The number of partitions to be handled in one turn when use " +
"`AlterTableAddPartitionCommand` to add partitions into table. The smaller " +
"batch size is, the less memory is required for the real handler, e.g. Hive Metastore.")
+ .version("3.0.0")
.intConf
.checkValue(_ > 0, "The value of spark.sql.addPartitionInBatch.size must be positive")
.createWithDefault(100)
@@ -2251,6 +2501,7 @@ object SQLConf {
val LEGACY_ALLOW_HASH_ON_MAPTYPE = buildConf("spark.sql.legacy.allowHashOnMapType")
.doc("When set to true, hash expressions can be applied on elements of MapType. Otherwise, " +
"an analysis exception will be thrown.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(false)
@@ -2263,6 +2514,7 @@ object SQLConf {
"a local date/timestamp in the source calendar, interpreting the resulted date/" +
"timestamp in the target calendar, and getting the number of micros/millis/days " +
"since the epoch 1970-01-01 00:00:00Z.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(false)
@@ -2275,6 +2527,7 @@ object SQLConf {
"a local date/timestamp in the source calendar, interpreting the resulted date/" +
"timestamp in the target calendar, and getting the number of micros/millis/days " +
"since the epoch 1970-01-01 00:00:00Z.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(false)
@@ -2287,6 +2540,7 @@ object SQLConf {
"a local date/timestamp in the source calendar, interpreting the resulted date/" +
"timestamp in the target calendar, and getting the number of micros/millis/days " +
"since the epoch 1970-01-01 00:00:00Z.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(false)
@@ -2299,6 +2553,7 @@ object SQLConf {
"a local date/timestamp in the source calendar, interpreting the resulted date/" +
"timestamp in the target calendar, and getting the number of micros/millis/days " +
"since the epoch 1970-01-01 00:00:00Z.")
+ .version("3.0.0")
.booleanConf
.createWithDefault(false)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org