You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by dw...@apache.org on 2022/09/09 19:47:17 UTC
[iceberg-docs] branch main updated: Bring over latest common docs and docs config
This is an automated email from the ASF dual-hosted git repository.
dweeks pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-docs.git
The following commit(s) were added to refs/heads/main by this push:
new 13dd0bfe Bring over latest common docs and docs config
new d6f1ad88 Merge pull request #153 from samredai/update-landing-page
13dd0bfe is described below
commit 13dd0bfe880f3d53f7bf0038ab0303cf9627a44f
Author: samredai <43...@users.noreply.github.com>
AuthorDate: Fri Sep 9 12:34:53 2022 -0700
Bring over latest common docs and docs config
---
docs/config.toml | 11 +++++------
landing-page/content/common/spec.md | 38 ++++++++++++++++++++++++++++++++-----
2 files changed, 38 insertions(+), 11 deletions(-)
diff --git a/docs/config.toml b/docs/config.toml
index 623bb6f0..02577574 100644
--- a/docs/config.toml
+++ b/docs/config.toml
@@ -9,7 +9,7 @@ theme= "iceberg-theme"
siteType = "docs"
search = true
versions.iceberg = "" # This is populated by the github deploy workflow and is equal to the branch name
- versions.nessie = "0.18.0"
+ versions.nessie = "0.20.0"
latestVersions.iceberg = "0.14.0" # This is used for the version badge on the "latest" site version
BookSection='docs' # This determines which directory will inform the left navigation menu
disableHome=true
@@ -31,13 +31,12 @@ home = [ "HTML", "RSS", "SearchIndex" ]
{ name = "0.12.1", pre = "relative", url = "../0.12.1", weight = 1000 }
]
topnav = [
- { name = "Quickstart", url = "/spark-quickstart", weight = 100 },
- { name = "Docs", url = "/docs/latest", weight = 200 },
+ { name = "Quickstart", pre = "relative", url = "../../spark-quickstart", weight = 100 },
+ { name = "Docs", pre = "relative", url = "../../docs/latest", weight = 200 },
{ name = "Releases", pre = "relative", url = "../../releases", weight = 600 },
- { name = "Roadmap", pre = "relative", url = "../../roadmap", weight = 997 },
{ name = "Blogs", pre = "relative", url = "../../blogs", weight = 998 },
{ name = "Talks", pre = "relative", url = "../../talks", weight = 999 },
- { name = "Vendors", pre = "relative", url = "../../vendors", weight = 1000 },
+ { name = "Roadmap", pre = "relative", url = "../../roadmap", weight = 997 },
{ name = "Project", weight = 1100 },
{ name = "Community", parent = "Project", pre = "relative", url = "../../community", weight = 100 },
{ name = "Spec", parent = "Project", pre = "relative", url = "../../spec", weight = 200 },
@@ -60,7 +59,7 @@ home = [ "HTML", "RSS", "SearchIndex" ]
{ name = "Trino", identifier = "_trino", weight = 500, url = "https://trino.io/docs/current/connector/iceberg.html" },
{ name = "Presto", identifier = "_presto", weight = 600, url = "https://prestodb.io/docs/current/connector/iceberg.html" },
{ name = "Dremio", identifier = "_dremio", weight = 700, url = "https://docs.dremio.com/data-formats/apache-iceberg/" },
- { name = "StarRocks", identifier = "_starrocks", weight = 701, url = "https://docs.starrocks.com/en-us/latest/using_starrocks/External_table#apache-iceberg-external-table" },
+ { name = "StarRocks", identifier = "_starrocks", weight = 701, url = "https://docs.starrocks.com/en-us/main/using_starrocks/External_table#apache-iceberg-external-table" },
{ name = "Amazon Athena", identifier = "_athena", weight = 800, url = "https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg.html" },
{ name = "Amazon EMR", identifier = "_emr", weight = 900, url = "https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-iceberg-use-cluster.html" },
{ name = "Impala", identifier = "_impala", weight = 1000, url = "https://impala.apache.org/docs/build/html/topics/impala_iceberg.html" },
diff --git a/landing-page/content/common/spec.md b/landing-page/content/common/spec.md
index 1154cb74..2ce1e6c9 100644
--- a/landing-page/content/common/spec.md
+++ b/landing-page/content/common/spec.md
@@ -665,9 +665,37 @@ Table metadata consists of the following fields:
| _optional_ | _required_ | **`sort-orders`**| A list of sort orders, stored as full sort order objects. |
| _optional_ | _required_ | **`default-sort-order-id`**| Default sort order id of the table. Note that this could be used by writers, but is not used when reading because reads use the specs stored in manifest files. |
| | _optional_ | **`refs`** | A map of snapshot references. The map keys are the unique snapshot reference names in the table, and the map values are snapshot reference objects. There is always a `main` branch reference pointing to the `current-snapshot-id` even if the `refs` map is null. |
+| _optional_ | _optional_ | **`statistics`** | A list (optional) of [table statistics](#table-statistics). |
For serialization details, see Appendix C.
+#### Table statistics
+
+Table statistics files are valid [Puffin files](../puffin-spec). Statistics are informational. A reader can choose to
+ignore statistics information. Statistics support is not required to read the table correctly. A table can contain
+many statistics files associated with different table snapshots.
+
+Statistics files metadata within `statistics` table metadata field is a struct with the following fields:
+
+| v1 | v2 | Field name | Type | Description |
+|----|----|------------|------|-------------|
+| _required_ | _required_ | **`snapshot-id`** | `string` | ID of the Iceberg table's snapshot the statistics were computed from. |
+| _required_ | _required_ | **`statistics-path`** | `string` | Path of the statistics file. See [Puffin file format](../puffin-spec). |
+| _required_ | _required_ | **`file-size-in-bytes`** | `long` | Size of the statistics file. |
+| _required_ | _required_ | **`file-footer-size-in-bytes`** | `long` | Total size of the statistics file's footer (not the footer payload size). See [Puffin file format](../puffin-spec) for footer definition. |
+| _optional_ | _optional_ | **`key-metadata`** | Base64-encoded implementation-specific key metadata for encryption. |
+| _required_ | _required_ | **`blob-metadata`** | `list<blob metadata>` (see below) | A list of the blob metadata for statistics contained in the file with structure described below. |
+
+Blob metadata is a struct with the following fields:
+
+| v1 | v2 | Field name | Type | Description |
+|----|----|------------|------|-------------|
+| _required_ | _required_ | **`type`** | `string` | Type of the blob. Matches Blob type in the Puffin file. |
+| _required_ | _required_ | **`snapshot-id`** | `long` | ID of the Iceberg table's snapshot the blob was computed from. |
+| _required_ | _required_ | **`sequence-number`** | `long` | Sequence number of the Iceberg table's snapshot the blob was computed from. |
+| _required_ | _required_ | **`fields`** | `list<integer>` | Ordered list of fields, given by field ID, on which the statistic was calculated. |
+| _optional_ | _optional_ | **`properties`** | `map<string, string>` | Additional properties associated with the statistic. Subset of Blob properties in the Puffin file. |
+
#### Commit Conflict Resolution and Retry
@@ -865,7 +893,7 @@ Note that the string map case is for maps where the key type is a string. Using
Values should be stored in Parquet using the types and logical type annotations in the table below. Column IDs are required.
-Lists must use the [3-level representation](https://github.com/apache/parquet-format/blob/master/LogicalTypes#lists).
+Lists must use the [3-level representation](https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists).
| Type | Parquet physical type | Logical type | Notes |
|--------------------|--------------------------------------------------------------------|---------------------------------------------|----------------------------------------------------------------|
@@ -1134,15 +1162,15 @@ This serialization scheme is for storing single values as individual binary valu
| **`long`** | **`JSON long`** | `34` | |
| **`float`** | **`JSON number`** | `1.0` | |
| **`double`** | **`JSON number`** | `1.0` | |
-| **`decimal(P,S)`** | **`JSON number`** | `14.20` | Stores the decimal as a number with S places after the decimal |
+| **`decimal(P,S)`** | **`JSON string`** | `"14.20"`, `"2E+20"` | Stores the string representation of the decimal value, specifically, for values with a positive scale, the number of digits to the right of the decimal point is used to indicate scale, for values with a negative scale, the scientific notation is used and the exponent must equal the negated scale |
| **`date`** | **`JSON string`** | `"2017-11-16"` | Stores ISO-8601 standard date |
| **`time`** | **`JSON string`** | `"22:31:08.123456"` | Stores ISO-8601 standard time with microsecond precision |
| **`timestamp`** | **`JSON string`** | `"2017-11-16T22:31:08.123456"` | Stores ISO-8601 standard timestamp with microsecond precision; must not include a zone offset |
-| **`timestamptz`** | **`JSON string`** | `"2017-11-16T22:31:08.123456-07:00"` | Stores ISO-8601 standard timestamp with microsecond precision; must include a zone offset |
+| **`timestamptz`** | **`JSON string`** | `"2017-11-16T22:31:08.123456+00:00"` | Stores ISO-8601 standard timestamp with microsecond precision; must include a zone offset and it must be '+00:00' |
| **`string`** | **`JSON string`** | `"iceberg"` | |
| **`uuid`** | **`JSON string`** | `"f79c3e09-677c-4bbd-a479-3f349cb785e7"` | Stores the lowercase uuid string |
-| **`fixed(L)`** | **`JSON string`** | `"0x00010203"` | Stored as a hexadecimal string, prefixed by `0x` |
-| **`binary`** | **`JSON string`** | `"0x00010203"` | Stored as a hexadecimal string, prefixed by `0x` |
+| **`fixed(L)`** | **`JSON string`** | `"000102ff"` | Stored as a hexadecimal string |
+| **`binary`** | **`JSON string`** | `"000102ff"` | Stored as a hexadecimal string |
| **`struct`** | **`JSON object by field ID`** | `{"1": 1, "2": "bar"}` | Stores struct fields using the field ID as the JSON field name; field values are stored using this JSON single-value format |
| **`list`** | **`JSON array of values`** | `[1, 2, 3]` | Stores a JSON array of values that are serialized using this JSON single-value format |
| **`map`** | **`JSON object of key and value arrays`** | `{ "keys": ["a", "b"], "values": [1, 2] }` | Stores arrays of keys and values; individual keys and values are serialized using this JSON single-value format |