You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by am...@apache.org on 2023/05/04 22:18:42 UTC

[iceberg] branch master updated: Spec: Update View spec to reflect that schema is defined at the version level and is required (#7485)

This is an automated email from the ASF dual-hosted git repository.

amoghj pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new d8142d1035 Spec: Update View spec to reflect that schema is defined at the version level  and is required (#7485)
d8142d1035 is described below

commit d8142d1035b015d264dfbb3492e0049530c7b5e5
Author: Amogh Jahagirdar <ja...@amazon.com>
AuthorDate: Thu May 4 15:18:36 2023 -0700

    Spec: Update View spec to reflect that schema is defined at the version level  and is required (#7485)
---
 format/view-spec.md | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/format/view-spec.md b/format/view-spec.md
index efd46c6faf..4081cba6ca 100644
--- a/format/view-spec.md
+++ b/format/view-spec.md
@@ -60,8 +60,8 @@ The view version metadata file has the following fields:
 |-------------|----------------------|-------------|
 | _required_  | `format-version`     | An integer version number for the view format; must be 1 |
 | _required_  | `location`           | The view's base location; used to create metadata file locations |
-| _optional_  | `current-schema-id`  | ID of the current schema of the view, if known |
-| _optional_  | `schemas`            | A list of known schemas |
+| _required_  | `current-schema-id`  | ID of the current schema of the view, if known |
+| _required_  | `schemas`            | A list of known schemas |
 | _required_  | `current-version-id` | ID of the current version of the view (`version-id`) |
 | _required_  | `versions`           | A list of known [versions](#versions) of the view [1] |
 | _required_  | `version-log`        | A list of [version log](#version-log) entries with the timestamp and `version-id` for every change to `current-version-id` |
@@ -75,12 +75,13 @@ Notes:
 
 Each version in `versions` is a struct with the following fields:
 
-| Requirement | Field name        | Description |
-|-------------|-------------------|-------------|
-| _required_  | `version-id`      | ID for the version |
-| _required_  | `timestamp-ms`    | Timestamp when the version was created (ms from epoch) |
+| Requirement | Field name        | Description                                                              |
+|-------------|-------------------|--------------------------------------------------------------------------|
+| _required_  | `version-id`      | ID for the version                                                       |
+| _required_  | `schema-id`       | ID of the schema for the view version                                    |
+| _required_  | `timestamp-ms`    | Timestamp when the version was created (ms from epoch)                   |
 | _required_  | `summary`         | A string to string map of [summary metadata](#summary) about the version |
-| _required_  | `representations` | A list of [representations](#representations) for the view definition |
+| _required_  | `representations` | A list of [representations](#representations) for the view definition    |
 
 #### Summary
 
@@ -112,7 +113,6 @@ The SQL representation stores the view definition as a SQL SELECT, with metadata
 | _required_  | `type`              | `string`       | Must be `sql` |
 | _required_  | `sql`               | `string`       | A SQL SELECT statement |
 | _required_  | `dialect`           | `string`       | The dialect of the `sql` SELECT statement (e.g., "trino" or "spark") |
-| _optional_  | `schema-id`         | `int`          | ID of the schema produced by the SELECT statement |
 | _optional_  | `default-catalog`   | `string`       | Catalog name to use when a reference in the SELECT does not contain a catalog |
 | _optional_  | `default-namespace` | `list<string>` | Namespace to use when a reference in the SELECT is a single identifier |
 | _optional_  | `field-aliases`     | `list<string>` | Column names optionally specified in the create statement |
@@ -140,7 +140,6 @@ This create statement would produce the following `sql` representation metadata:
 | `type`              | `"sql"` |
 | `sql`               | `"SELECT\n    COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2"` |
 | `dialect`           | `"spark"` |
-| `schema-id`         | `1` |
 | `default-catalog`   | `"prod"` |
 | `default-namespace` | `["default"]` |
 | `field-aliases`     | `["event_count", "event_date"]` |
@@ -201,6 +200,7 @@ s3://bucket/warehouse/default.db/event_agg/metadata/00001-(uuid).metadata.json
   "versions" : [ {
     "version-id" : 1,
     "timestamp-ms" : 1573518431292,
+    "schema-id" : 1,
     "summary" : {
       "operation" : "create",
       "engine-name" : "Spark",
@@ -210,7 +210,6 @@ s3://bucket/warehouse/default.db/event_agg/metadata/00001-(uuid).metadata.json
       "type" : "sql",
       "sql" : "SELECT\n    COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2",
       "dialect" : "spark",
-      "schema-id" : 1,
       "default-catalog" : "prod",
       "default-namespace" : [ "default" ],
       "field-aliases" : ["event_count", "event_date"],
@@ -270,6 +269,7 @@ s3://bucket/warehouse/default.db/event_agg/metadata/00002-(uuid).metadata.json
   "versions" : [ {
     "version-id" : 1,
     "timestamp-ms" : 1573518431292,
+    "schema-id" : 1,
     "summary" : {
       "operation" : "create",
       "engine-name" : "Spark",
@@ -279,7 +279,6 @@ s3://bucket/warehouse/default.db/event_agg/metadata/00002-(uuid).metadata.json
       "type" : "sql",
       "sql" : "SELECT\n    COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2",
       "dialect" : "spark",
-      "schema-id" : 1,
       "default-catalog" : "prod",
       "default-namespace" : [ "default" ],
       "field-aliases" : ["event_count", "event_date"],
@@ -297,7 +296,6 @@ s3://bucket/warehouse/default.db/event_agg/metadata/00002-(uuid).metadata.json
       "type" : "sql",
       "sql" : "SELECT\n    COUNT(1), CAST(event_ts AS DATE)\nFROM prod.default.events\nGROUP BY 2",
       "dialect" : "spark",
-      "schema-id" : 1,
       "default-catalog" : "prod",
       "default-namespace" : [ "default" ],
       "field-aliases" : ["event_count", "event_date"]