You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by dz...@apache.org on 2021/02/15 12:57:23 UTC

[drill] 04/07: Add 030-rdbms-metastore.md

This is an automated email from the ASF dual-hosted git repository.

dzamo pushed a commit to branch gh-pages
in repository https://gitbox.apache.org/repos/asf/drill.git

commit bdc00b63923507ec217880ce75ca795f6be4ed29
Author: James Turton <ja...@somecomputer.xyz>
AuthorDate: Mon Feb 15 13:05:35 2021 +0200

    Add 030-rdbms-metastore.md
---
 _data/docs.json                                    | 120 ++++++++++++---
 ...metastore.md => 020-drill-iceberg-metastore.md} |   0
 .../drill-metastore/030-rdbms-metastore.md         | 161 +++++++++++++++++++++
 _sass/_site-main.scss                              |   1 +
 4 files changed, 264 insertions(+), 18 deletions(-)

diff --git a/_data/docs.json b/_data/docs.json
index f764bcf..a7a7cab 100644
--- a/_data/docs.json
+++ b/_data/docs.json
@@ -4708,12 +4708,12 @@
                 }
             ],
             "children": [],
-            "next_title": "Performance Tuning Introduction",
-            "next_url": "/docs/performance-tuning-introduction/",
+            "next_title": "RDBMS Metastore",
+            "next_url": "/docs/rdbms-metastore/",
             "parent": "Drill Metastore",
             "previous_title": "Using Drill Metastore",
             "previous_url": "/docs/using-drill-metastore/",
-            "relative_path": "_docs/performance-tuning/drill-metastore/030-drill-iceberg-metastore.md",
+            "relative_path": "_docs/performance-tuning/drill-metastore/020-drill-iceberg-metastore.md",
             "title": "Drill Iceberg Metastore",
             "url": "/docs/drill-iceberg-metastore/"
         },
@@ -4775,14 +4775,35 @@
                         }
                     ],
                     "children": [],
-                    "next_title": "Performance Tuning Introduction",
-                    "next_url": "/docs/performance-tuning-introduction/",
+                    "next_title": "RDBMS Metastore",
+                    "next_url": "/docs/rdbms-metastore/",
                     "parent": "Drill Metastore",
                     "previous_title": "Using Drill Metastore",
                     "previous_url": "/docs/using-drill-metastore/",
-                    "relative_path": "_docs/performance-tuning/drill-metastore/030-drill-iceberg-metastore.md",
+                    "relative_path": "_docs/performance-tuning/drill-metastore/020-drill-iceberg-metastore.md",
                     "title": "Drill Iceberg Metastore",
                     "url": "/docs/drill-iceberg-metastore/"
+                },
+                {
+                    "breadcrumbs": [
+                        {
+                            "title": "Drill Metastore",
+                            "url": "/docs/drill-metastore/"
+                        },
+                        {
+                            "title": "Performance Tuning",
+                            "url": "/docs/performance-tuning/"
+                        }
+                    ],
+                    "children": [],
+                    "next_title": "Performance Tuning Introduction",
+                    "next_url": "/docs/performance-tuning-introduction/",
+                    "parent": "Drill Metastore",
+                    "previous_title": "Drill Iceberg Metastore",
+                    "previous_url": "/docs/drill-iceberg-metastore/",
+                    "relative_path": "_docs/performance-tuning/drill-metastore/030-rdbms-metastore.md",
+                    "title": "RDBMS Metastore",
+                    "url": "/docs/rdbms-metastore/"
                 }
             ],
             "next_title": "Using Drill Metastore",
@@ -8419,14 +8440,35 @@
                                 }
                             ],
                             "children": [],
-                            "next_title": "Performance Tuning Introduction",
-                            "next_url": "/docs/performance-tuning-introduction/",
+                            "next_title": "RDBMS Metastore",
+                            "next_url": "/docs/rdbms-metastore/",
                             "parent": "Drill Metastore",
                             "previous_title": "Using Drill Metastore",
                             "previous_url": "/docs/using-drill-metastore/",
-                            "relative_path": "_docs/performance-tuning/drill-metastore/030-drill-iceberg-metastore.md",
+                            "relative_path": "_docs/performance-tuning/drill-metastore/020-drill-iceberg-metastore.md",
                             "title": "Drill Iceberg Metastore",
                             "url": "/docs/drill-iceberg-metastore/"
+                        },
+                        {
+                            "breadcrumbs": [
+                                {
+                                    "title": "Drill Metastore",
+                                    "url": "/docs/drill-metastore/"
+                                },
+                                {
+                                    "title": "Performance Tuning",
+                                    "url": "/docs/performance-tuning/"
+                                }
+                            ],
+                            "children": [],
+                            "next_title": "Performance Tuning Introduction",
+                            "next_url": "/docs/performance-tuning-introduction/",
+                            "parent": "Drill Metastore",
+                            "previous_title": "Drill Iceberg Metastore",
+                            "previous_url": "/docs/drill-iceberg-metastore/",
+                            "relative_path": "_docs/performance-tuning/drill-metastore/030-rdbms-metastore.md",
+                            "title": "RDBMS Metastore",
+                            "url": "/docs/rdbms-metastore/"
                         }
                     ],
                     "next_title": "Using Drill Metastore",
@@ -8449,8 +8491,8 @@
                     "next_title": "Partition Pruning",
                     "next_url": "/docs/partition-pruning/",
                     "parent": "Performance Tuning",
-                    "previous_title": "Drill Iceberg Metastore",
-                    "previous_url": "/docs/drill-iceberg-metastore/",
+                    "previous_title": "RDBMS Metastore",
+                    "previous_url": "/docs/rdbms-metastore/",
                     "relative_path": "_docs/performance-tuning/010-performance-tuning-introduction.md",
                     "title": "Performance Tuning Introduction",
                     "url": "/docs/performance-tuning-introduction/"
@@ -8944,8 +8986,8 @@
             "next_title": "Partition Pruning",
             "next_url": "/docs/partition-pruning/",
             "parent": "Performance Tuning",
-            "previous_title": "Drill Iceberg Metastore",
-            "previous_url": "/docs/drill-iceberg-metastore/",
+            "previous_title": "RDBMS Metastore",
+            "previous_url": "/docs/rdbms-metastore/",
             "relative_path": "_docs/performance-tuning/010-performance-tuning-introduction.md",
             "title": "Performance Tuning Introduction",
             "url": "/docs/performance-tuning-introduction/"
@@ -10827,6 +10869,27 @@
             "title": "Querying the INFORMATION SCHEMA",
             "url": "/docs/querying-the-information-schema/"
         },
+        "RDBMS Metastore": {
+            "breadcrumbs": [
+                {
+                    "title": "Drill Metastore",
+                    "url": "/docs/drill-metastore/"
+                },
+                {
+                    "title": "Performance Tuning",
+                    "url": "/docs/performance-tuning/"
+                }
+            ],
+            "children": [],
+            "next_title": "Performance Tuning Introduction",
+            "next_url": "/docs/performance-tuning-introduction/",
+            "parent": "Drill Metastore",
+            "previous_title": "Drill Iceberg Metastore",
+            "previous_url": "/docs/drill-iceberg-metastore/",
+            "relative_path": "_docs/performance-tuning/drill-metastore/030-rdbms-metastore.md",
+            "title": "RDBMS Metastore",
+            "url": "/docs/rdbms-metastore/"
+        },
         "RDBMS Storage Plugin": {
             "breadcrumbs": [
                 {
@@ -20035,14 +20098,35 @@
                                 }
                             ],
                             "children": [],
-                            "next_title": "Performance Tuning Introduction",
-                            "next_url": "/docs/performance-tuning-introduction/",
+                            "next_title": "RDBMS Metastore",
+                            "next_url": "/docs/rdbms-metastore/",
                             "parent": "Drill Metastore",
                             "previous_title": "Using Drill Metastore",
                             "previous_url": "/docs/using-drill-metastore/",
-                            "relative_path": "_docs/performance-tuning/drill-metastore/030-drill-iceberg-metastore.md",
+                            "relative_path": "_docs/performance-tuning/drill-metastore/020-drill-iceberg-metastore.md",
                             "title": "Drill Iceberg Metastore",
                             "url": "/docs/drill-iceberg-metastore/"
+                        },
+                        {
+                            "breadcrumbs": [
+                                {
+                                    "title": "Drill Metastore",
+                                    "url": "/docs/drill-metastore/"
+                                },
+                                {
+                                    "title": "Performance Tuning",
+                                    "url": "/docs/performance-tuning/"
+                                }
+                            ],
+                            "children": [],
+                            "next_title": "Performance Tuning Introduction",
+                            "next_url": "/docs/performance-tuning-introduction/",
+                            "parent": "Drill Metastore",
+                            "previous_title": "Drill Iceberg Metastore",
+                            "previous_url": "/docs/drill-iceberg-metastore/",
+                            "relative_path": "_docs/performance-tuning/drill-metastore/030-rdbms-metastore.md",
+                            "title": "RDBMS Metastore",
+                            "url": "/docs/rdbms-metastore/"
                         }
                     ],
                     "next_title": "Using Drill Metastore",
@@ -20065,8 +20149,8 @@
                     "next_title": "Partition Pruning",
                     "next_url": "/docs/partition-pruning/",
                     "parent": "Performance Tuning",
-                    "previous_title": "Drill Iceberg Metastore",
-                    "previous_url": "/docs/drill-iceberg-metastore/",
+                    "previous_title": "RDBMS Metastore",
+                    "previous_url": "/docs/rdbms-metastore/",
                     "relative_path": "_docs/performance-tuning/010-performance-tuning-introduction.md",
                     "title": "Performance Tuning Introduction",
                     "url": "/docs/performance-tuning-introduction/"
diff --git a/_docs/performance-tuning/drill-metastore/030-drill-iceberg-metastore.md b/_docs/performance-tuning/drill-metastore/020-drill-iceberg-metastore.md
similarity index 100%
rename from _docs/performance-tuning/drill-metastore/030-drill-iceberg-metastore.md
rename to _docs/performance-tuning/drill-metastore/020-drill-iceberg-metastore.md
diff --git a/_docs/performance-tuning/drill-metastore/030-rdbms-metastore.md b/_docs/performance-tuning/drill-metastore/030-rdbms-metastore.md
new file mode 100644
index 0000000..e10ce68
--- /dev/null
+++ b/_docs/performance-tuning/drill-metastore/030-rdbms-metastore.md
@@ -0,0 +1,161 @@
+---
+title: "RDBMS Metastore"
+slug: "RDBMS Metastore"
+parent: "Drill Metastore"
+---
+
+The RDBMS Metastore implementation allows you store Drill Metastore metadata in a configured RDBMS.
+
+## Configuration
+
+Currently, the RDBMS Metastore is not the default implementation.
+To enable the RDBMS Metastore create the `drill-metastore-override.conf` file 
+in your config directory and specify the RDBMS Metastore class:
+
+```yaml
+drill.metastore: {
+  implementation.class: "org.apache.drill.metastore.rdbms.RdbmsMetastore"
+}
+```
+
+### Connection properties
+
+Use the connection properties to specify how Drill should connect to your Metastore database.
+
+`drill.metastore.rdbms.data_source.driver` - driver class name. Required. 
+Note: the driver class must be included into the Drill classpath. 
+The easiest way to do that is to put the driver jar file into the `$DRILL_HOME/jars/3rdparty` folder.
+Or, to make upgrades easier, in your `$DRILL_SITE/jars` folder.  Drill includes the driver for SQLite.
+
+`drill.metastore.rdbms.data_source.url` - connection url. Required.
+
+`drill.metastore.rdbms.data_source.username` - database user on whose behalf the connection is
+being made. Optional, if database does not require user to connect. 
+
+`drill.metastore.rdbms.data_source.password` - database user's password. 
+Optional, if database does not require user's password to connect.
+
+`drill.metastore.rdbms.data_source.properties` - specifies properties which will be used
+during data source creation. See list of available [Hikari properties](https://github.com/brettwooldridge/HikariCP)
+for more details.
+
+### Default configuration 
+
+Out of the box, the Drill RDBMS Metastore is configured to use the embedded file system based SQLite database.
+It will be created locally in user's home directory under `${drill.exec.zk.root}"/metastore` location.
+
+Default setup can be used only in Drill embedded mode. SQLite is an embedded database; is not distributed. 
+SQLite is good for trying out the feature, for testing, for a running Drill in embedded mode, 
+and perhaps for a single-node Drill "cluster". If should not be used in a multi-node cluster. 
+Each Drillbit will have its own version of the truth and behavior will be undefined and incorrect.
+
+### Custom configuration
+
+`drill-metastore-override.conf` is used to customize connection details to the Drill Metastore database.
+See `drill-metastore-override-example.conf` for more details.
+
+#### Example of PostgreSQL configuration
+
+```yaml
+drill.metastore: {
+  implementation.class: "org.apache.drill.metastore.rdbms.RdbmsMetastore",
+  rdbms: {
+    data_source: {
+      driver: "org.postgresql.Driver",
+      url: "jdbc:postgresql://localhost:1234/mydb?currentSchema=drill_metastore",
+      username: "user",
+      password: "password"
+    }
+  }
+}
+```
+
+Note: as mentioned above, the PostgreSQL JDBC driver must be present in the Drill classpath.
+
+#### Example of MySQL configuration
+
+```yaml
+drill.metastore: {
+  implementation.class: "org.apache.drill.metastore.rdbms.RdbmsMetastore",
+  rdbms: {
+    data_source: {
+      driver: "com.mysql.cj.jdbc.Driver",
+      url: "jdbc:mysql://localhost:1234/drill_metastore",
+      username: "user",
+      password: "password"
+    }
+  }
+}
+```
+
+Note: as mentioned above, the MySQL JDBC driver must be present in the Drill classpath.
+
+{% include startnote.html %}
+For MySQL connector version 6+, use the <code>com.mysql.cj.jdbc.Driver</code> driver class,
+for older versions use the <code>com.mysql.jdbc.Driver</code>.
+{% include endnote.html %}
+
+## Tables structure
+
+The Drill Metastore stores several types of metadata, called components. Currently, only the `tables` component is implemented.
+The `tables` component provides metadata about Drill tables, including their segments, files, row groups and partitions.
+In Drill `tables` component unit is represented by `TableMetadataUnit` class which is applicable to any metadata type.
+The `TableMetadataUnit` class holds fields for all five metadata types within the `tables` component. 
+Any fields not applicable to a particular metadata type are simply ignored and remain unset.
+
+In the RDBMS implementation of the Drill Metastore, the tables component includes five tables, one for each metadata type. 
+The five tables are: `TABLES`, `SEGMENTS`, `FILES`, `ROW_GROUPS`, and `PARTITIONS`.
+See `src/main/resources/db/changelog/changes/initial_ddls.yaml` for the schema and indexes of each table.
+
+The Drill Metastore API has the following semantics:
+* most of the time all data about component is accessed;
+* data is filtered by non-complex fields, like storage plugin, workspace, table name, etc;
+* data is overwritten fully, there is no update by certain fields.
+
+Taking into account the Drill Metastore API semantics, the RDBMS Drill Metastore schema is slightly denormalized.
+Having normalized structure would lead to unnecessary joins during select, index re-indexing during update.
+
+### Table creation
+
+The RDBMS Metastore uses [Liquibase](https://www.liquibase.org/documentation/core-concepts/index.html)
+to create the needed tables during the RDBMS Metastore initialization. Users should not create any tables manually.
+
+### Database schema
+
+Liquibase uses a yaml configuration file to apply changes to the database schema: `src/main/resources/db/changelog/changelog.yaml`.
+Liquibase converts the yaml specification into the DDL / DML commands suitable required for the configured database.
+See list of supported databases: https://www.liquibase.org/databases.html.
+
+The Drill Metastore tables are created in the database schema indicated in the connection URL.
+This will be the default schema unless you specify a different schema. Drill will not create the schema, however. 
+Best practice is to create a schema within your database for the Drill metastore before initializing the Metastore.
+
+Example:
+
+PostgreSQL: `jdbc:postgresql://localhost:1234/mydb?currentSchema=drill_metastore`
+
+MySQL: `jdbc:mysql://localhost:1234/drill_metastore`
+
+Since Drill will create the required tables, ensure that the database user has the following permissions in the metastore schema:
+* read and write tables;
+* create and modify database objects (tables, indexes, views, etc.).
+
+### Liquibase tables
+
+During Drill RDBMS Metastore initialization, Liquibase will create two internal tracking tables:
+`DATABASECHANGELOG` and `DATABASECHANGELOGLOCK`. They are needed to track schema changes and concurrent updates.
+See https://www.liquibase.org/get_started/how-lb-works.html for more details.
+
+## Query execution
+
+SQL queries issued to RDBMS Metastore tables are generated using [JOOQ](https://www.jooq.org/doc/3.13/manual/getting-started/).
+Drill uses the open-source version of JOOQ to generate the queries sent to the configured Metastore database.
+
+JOOQ generates SQL statements based on SQL dialect determined by database connection details.
+List of supported dialects: https://www.jooq.org/javadoc/3.13.x/org.jooq/org/jooq/SQLDialect.html.
+Note: dialects annotated with `@Pro` are not supported, since open-source version of JOOQ is used.
+
+## Supported databases
+
+The RDBMS Metastore was tested with `SQLite`, `PostreSQL` and `MySQL`. Other databases should also work
+if there is Liquibase and JOOQ support for them.
diff --git a/_sass/_site-main.scss b/_sass/_site-main.scss
index 8374782..5ebdf39 100644
--- a/_sass/_site-main.scss
+++ b/_sass/_site-main.scss
@@ -147,6 +147,7 @@ a.anchor {
 
 #menu ul li.social-menu-item img {
   width: 22px;
+  padding-bottom: 10px;
 }
 
 #menu ul li ul {