You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by dw...@apache.org on 2022/09/09 19:47:17 UTC

[iceberg-docs] branch main updated: Bring over latest common docs and docs config

This is an automated email from the ASF dual-hosted git repository.

dweeks pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-docs.git


The following commit(s) were added to refs/heads/main by this push:
     new 13dd0bfe Bring over latest common docs and docs config
     new d6f1ad88 Merge pull request #153 from samredai/update-landing-page
13dd0bfe is described below

commit 13dd0bfe880f3d53f7bf0038ab0303cf9627a44f
Author: samredai <43...@users.noreply.github.com>
AuthorDate: Fri Sep 9 12:34:53 2022 -0700

    Bring over latest common docs and docs config
---
 docs/config.toml                    | 11 +++++------
 landing-page/content/common/spec.md | 38 ++++++++++++++++++++++++++++++++-----
 2 files changed, 38 insertions(+), 11 deletions(-)

diff --git a/docs/config.toml b/docs/config.toml
index 623bb6f0..02577574 100644
--- a/docs/config.toml
+++ b/docs/config.toml
@@ -9,7 +9,7 @@ theme= "iceberg-theme"
   siteType = "docs"
   search = true
   versions.iceberg = "" # This is populated by the github deploy workflow and is equal to the branch name
-  versions.nessie = "0.18.0"
+  versions.nessie = "0.20.0"
   latestVersions.iceberg = "0.14.0"  # This is used for the version badge on the "latest" site version
   BookSection='docs' # This determines which directory will inform the left navigation menu
   disableHome=true
@@ -31,13 +31,12 @@ home = [ "HTML", "RSS", "SearchIndex" ]
     { name = "0.12.1", pre = "relative", url = "../0.12.1", weight = 1000 }
   ]
   topnav = [
-    { name = "Quickstart", url = "/spark-quickstart", weight = 100 },
-    { name = "Docs", url = "/docs/latest", weight = 200 },
+    { name = "Quickstart", pre = "relative", url = "../../spark-quickstart", weight = 100 },
+    { name = "Docs", pre = "relative", url = "../../docs/latest", weight = 200 },
     { name = "Releases", pre = "relative", url = "../../releases", weight = 600 },
-    { name = "Roadmap", pre = "relative", url = "../../roadmap", weight = 997 },
     { name = "Blogs", pre = "relative", url = "../../blogs", weight = 998 },
     { name = "Talks", pre = "relative", url = "../../talks", weight = 999 },
-    { name = "Vendors", pre = "relative", url = "../../vendors", weight = 1000 },
+    { name = "Roadmap", pre = "relative", url = "../../roadmap", weight = 997 },
     { name = "Project", weight = 1100 },
     { name = "Community", parent = "Project", pre = "relative", url = "../../community", weight = 100 },
     { name = "Spec", parent = "Project", pre = "relative", url = "../../spec", weight = 200 },
@@ -60,7 +59,7 @@ home = [ "HTML", "RSS", "SearchIndex" ]
     { name = "Trino", identifier = "_trino", weight = 500, url = "https://trino.io/docs/current/connector/iceberg.html" },
     { name = "Presto", identifier = "_presto", weight = 600, url = "https://prestodb.io/docs/current/connector/iceberg.html" },
     { name = "Dremio", identifier = "_dremio", weight = 700, url = "https://docs.dremio.com/data-formats/apache-iceberg/" },
-    { name = "StarRocks", identifier = "_starrocks", weight = 701, url = "https://docs.starrocks.com/en-us/latest/using_starrocks/External_table#apache-iceberg-external-table" },
+    { name = "StarRocks", identifier = "_starrocks", weight = 701, url = "https://docs.starrocks.com/en-us/main/using_starrocks/External_table#apache-iceberg-external-table" },
     { name = "Amazon Athena", identifier = "_athena", weight = 800, url = "https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg.html" },
     { name = "Amazon EMR", identifier = "_emr", weight = 900, url = "https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-iceberg-use-cluster.html" },
     { name = "Impala", identifier = "_impala", weight = 1000, url = "https://impala.apache.org/docs/build/html/topics/impala_iceberg.html" },
diff --git a/landing-page/content/common/spec.md b/landing-page/content/common/spec.md
index 1154cb74..2ce1e6c9 100644
--- a/landing-page/content/common/spec.md
+++ b/landing-page/content/common/spec.md
@@ -665,9 +665,37 @@ Table metadata consists of the following fields:
 | _optional_ | _required_ | **`sort-orders`**| A list of sort orders, stored as full sort order objects. |
 | _optional_ | _required_ | **`default-sort-order-id`**| Default sort order id of the table. Note that this could be used by writers, but is not used when reading because reads use the specs stored in manifest files. |
 |            | _optional_ | **`refs`** | A map of snapshot references. The map keys are the unique snapshot reference names in the table, and the map values are snapshot reference objects. There is always a `main` branch reference pointing to the `current-snapshot-id` even if the `refs` map is null. |
+| _optional_ | _optional_ | **`statistics`** | A list (optional) of [table statistics](#table-statistics). |
 
 For serialization details, see Appendix C.
 
+#### Table statistics
+
+Table statistics files are valid [Puffin files](../puffin-spec). Statistics are informational. A reader can choose to
+ignore statistics information. Statistics support is not required to read the table correctly. A table can contain
+many statistics files associated with different table snapshots.
+
+Statistics files metadata within `statistics` table metadata field is a struct with the following fields:
+
+| v1 | v2 | Field name | Type | Description |
+|----|----|------------|------|-------------|
+| _required_ | _required_ | **`snapshot-id`** | `string` | ID of the Iceberg table's snapshot the statistics were computed from. |
+| _required_ | _required_ | **`statistics-path`** | `string` | Path of the statistics file. See [Puffin file format](../puffin-spec). |
+| _required_ | _required_ | **`file-size-in-bytes`** | `long` | Size of the statistics file. |
+| _required_ | _required_ | **`file-footer-size-in-bytes`** | `long` | Total size of the statistics file's footer (not the footer payload size). See [Puffin file format](../puffin-spec) for footer definition. |
+| _optional_ | _optional_ | **`key-metadata`** | Base64-encoded implementation-specific key metadata for encryption. |
+| _required_ | _required_ | **`blob-metadata`** | `list<blob metadata>` (see below) | A list of the blob metadata for statistics contained in the file with structure described below. |
+
+Blob metadata is a struct with the following fields:
+
+| v1 | v2 | Field name | Type | Description |
+|----|----|------------|------|-------------|
+| _required_ | _required_ | **`type`** | `string` | Type of the blob. Matches Blob type in the Puffin file. |
+| _required_ | _required_ | **`snapshot-id`** | `long` | ID of the Iceberg table's snapshot the blob was computed from. |
+| _required_ | _required_ | **`sequence-number`** | `long` | Sequence number of the Iceberg table's snapshot the blob was computed from. |
+| _required_ | _required_ | **`fields`** | `list<integer>` | Ordered list of fields, given by field ID, on which the statistic was calculated. |
+| _optional_ | _optional_ | **`properties`** | `map<string, string>` | Additional properties associated with the statistic. Subset of Blob properties in the Puffin file. |
+
 
 #### Commit Conflict Resolution and Retry
 
@@ -865,7 +893,7 @@ Note that the string map case is for maps where the key type is a string. Using
 
 Values should be stored in Parquet using the types and logical type annotations in the table below. Column IDs are required.
 
-Lists must use the [3-level representation](https://github.com/apache/parquet-format/blob/master/LogicalTypes#lists).
+Lists must use the [3-level representation](https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists).
 
 | Type               | Parquet physical type                                              | Logical type                                | Notes                                                          |
 |--------------------|--------------------------------------------------------------------|---------------------------------------------|----------------------------------------------------------------|
@@ -1134,15 +1162,15 @@ This serialization scheme is for storing single values as individual binary valu
 | **`long`**         | **`JSON long`**                           | `34`                                       | |
 | **`float`**        | **`JSON number`**                         | `1.0`                                      | |
 | **`double`**       | **`JSON number`**                         | `1.0`                                      | |
-| **`decimal(P,S)`** | **`JSON number`**                         | `14.20`                                    | Stores the decimal as a number with S places after the decimal |
+| **`decimal(P,S)`** | **`JSON string`**                         | `"14.20"`, `"2E+20"`                       | Stores the string representation of the decimal value, specifically, for values with a positive scale, the number of digits to the right of the decimal point is used to indicate scale, for values with a negative scale, the scientific notation is used and the exponent must equal the negated scale |
 | **`date`**         | **`JSON string`**                         | `"2017-11-16"`                             | Stores ISO-8601 standard date |
 | **`time`**         | **`JSON string`**                         | `"22:31:08.123456"`                        | Stores ISO-8601 standard time with microsecond precision |
 | **`timestamp`**    | **`JSON string`**                         | `"2017-11-16T22:31:08.123456"`             | Stores ISO-8601 standard timestamp with microsecond precision; must not include a zone offset |
-| **`timestamptz`**  | **`JSON string`**                         | `"2017-11-16T22:31:08.123456-07:00"`       | Stores ISO-8601 standard timestamp with microsecond precision; must include a zone offset |
+| **`timestamptz`**  | **`JSON string`**                         | `"2017-11-16T22:31:08.123456+00:00"`       | Stores ISO-8601 standard timestamp with microsecond precision; must include a zone offset and it must be '+00:00' |
 | **`string`**       | **`JSON string`**                         | `"iceberg"`                                | |
 | **`uuid`**         | **`JSON string`**                         | `"f79c3e09-677c-4bbd-a479-3f349cb785e7"`   | Stores the lowercase uuid string |
-| **`fixed(L)`**     | **`JSON string`**                         | `"0x00010203"`                             | Stored as a hexadecimal string, prefixed by `0x` |
-| **`binary`**       | **`JSON string`**                         | `"0x00010203"`                             | Stored as a hexadecimal string, prefixed by `0x` |
+| **`fixed(L)`**     | **`JSON string`**                         | `"000102ff"`                               | Stored as a hexadecimal string |
+| **`binary`**       | **`JSON string`**                         | `"000102ff"`                               | Stored as a hexadecimal string |
 | **`struct`**       | **`JSON object by field ID`**             | `{"1": 1, "2": "bar"}`                     | Stores struct fields using the field ID as the JSON field name; field values are stored using this JSON single-value format |
 | **`list`**         | **`JSON array of values`**                | `[1, 2, 3]`                                | Stores a JSON array of values that are serialized using this JSON single-value format |
 | **`map`**          | **`JSON object of key and value arrays`** | `{ "keys": ["a", "b"], "values": [1, 2] }` | Stores arrays of keys and values; individual keys and values are serialized using this JSON single-value format |