You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by dz...@apache.org on 2021/02/15 12:57:23 UTC
[drill] 04/07: Add 030-rdbms-metastore.md
This is an automated email from the ASF dual-hosted git repository.
dzamo pushed a commit to branch gh-pages
in repository https://gitbox.apache.org/repos/asf/drill.git
commit bdc00b63923507ec217880ce75ca795f6be4ed29
Author: James Turton <ja...@somecomputer.xyz>
AuthorDate: Mon Feb 15 13:05:35 2021 +0200
Add 030-rdbms-metastore.md
---
_data/docs.json | 120 ++++++++++++---
...metastore.md => 020-drill-iceberg-metastore.md} | 0
.../drill-metastore/030-rdbms-metastore.md | 161 +++++++++++++++++++++
_sass/_site-main.scss | 1 +
4 files changed, 264 insertions(+), 18 deletions(-)
diff --git a/_data/docs.json b/_data/docs.json
index f764bcf..a7a7cab 100644
--- a/_data/docs.json
+++ b/_data/docs.json
@@ -4708,12 +4708,12 @@
}
],
"children": [],
- "next_title": "Performance Tuning Introduction",
- "next_url": "/docs/performance-tuning-introduction/",
+ "next_title": "RDBMS Metastore",
+ "next_url": "/docs/rdbms-metastore/",
"parent": "Drill Metastore",
"previous_title": "Using Drill Metastore",
"previous_url": "/docs/using-drill-metastore/",
- "relative_path": "_docs/performance-tuning/drill-metastore/030-drill-iceberg-metastore.md",
+ "relative_path": "_docs/performance-tuning/drill-metastore/020-drill-iceberg-metastore.md",
"title": "Drill Iceberg Metastore",
"url": "/docs/drill-iceberg-metastore/"
},
@@ -4775,14 +4775,35 @@
}
],
"children": [],
- "next_title": "Performance Tuning Introduction",
- "next_url": "/docs/performance-tuning-introduction/",
+ "next_title": "RDBMS Metastore",
+ "next_url": "/docs/rdbms-metastore/",
"parent": "Drill Metastore",
"previous_title": "Using Drill Metastore",
"previous_url": "/docs/using-drill-metastore/",
- "relative_path": "_docs/performance-tuning/drill-metastore/030-drill-iceberg-metastore.md",
+ "relative_path": "_docs/performance-tuning/drill-metastore/020-drill-iceberg-metastore.md",
"title": "Drill Iceberg Metastore",
"url": "/docs/drill-iceberg-metastore/"
+ },
+ {
+ "breadcrumbs": [
+ {
+ "title": "Drill Metastore",
+ "url": "/docs/drill-metastore/"
+ },
+ {
+ "title": "Performance Tuning",
+ "url": "/docs/performance-tuning/"
+ }
+ ],
+ "children": [],
+ "next_title": "Performance Tuning Introduction",
+ "next_url": "/docs/performance-tuning-introduction/",
+ "parent": "Drill Metastore",
+ "previous_title": "Drill Iceberg Metastore",
+ "previous_url": "/docs/drill-iceberg-metastore/",
+ "relative_path": "_docs/performance-tuning/drill-metastore/030-rdbms-metastore.md",
+ "title": "RDBMS Metastore",
+ "url": "/docs/rdbms-metastore/"
}
],
"next_title": "Using Drill Metastore",
@@ -8419,14 +8440,35 @@
}
],
"children": [],
- "next_title": "Performance Tuning Introduction",
- "next_url": "/docs/performance-tuning-introduction/",
+ "next_title": "RDBMS Metastore",
+ "next_url": "/docs/rdbms-metastore/",
"parent": "Drill Metastore",
"previous_title": "Using Drill Metastore",
"previous_url": "/docs/using-drill-metastore/",
- "relative_path": "_docs/performance-tuning/drill-metastore/030-drill-iceberg-metastore.md",
+ "relative_path": "_docs/performance-tuning/drill-metastore/020-drill-iceberg-metastore.md",
"title": "Drill Iceberg Metastore",
"url": "/docs/drill-iceberg-metastore/"
+ },
+ {
+ "breadcrumbs": [
+ {
+ "title": "Drill Metastore",
+ "url": "/docs/drill-metastore/"
+ },
+ {
+ "title": "Performance Tuning",
+ "url": "/docs/performance-tuning/"
+ }
+ ],
+ "children": [],
+ "next_title": "Performance Tuning Introduction",
+ "next_url": "/docs/performance-tuning-introduction/",
+ "parent": "Drill Metastore",
+ "previous_title": "Drill Iceberg Metastore",
+ "previous_url": "/docs/drill-iceberg-metastore/",
+ "relative_path": "_docs/performance-tuning/drill-metastore/030-rdbms-metastore.md",
+ "title": "RDBMS Metastore",
+ "url": "/docs/rdbms-metastore/"
}
],
"next_title": "Using Drill Metastore",
@@ -8449,8 +8491,8 @@
"next_title": "Partition Pruning",
"next_url": "/docs/partition-pruning/",
"parent": "Performance Tuning",
- "previous_title": "Drill Iceberg Metastore",
- "previous_url": "/docs/drill-iceberg-metastore/",
+ "previous_title": "RDBMS Metastore",
+ "previous_url": "/docs/rdbms-metastore/",
"relative_path": "_docs/performance-tuning/010-performance-tuning-introduction.md",
"title": "Performance Tuning Introduction",
"url": "/docs/performance-tuning-introduction/"
@@ -8944,8 +8986,8 @@
"next_title": "Partition Pruning",
"next_url": "/docs/partition-pruning/",
"parent": "Performance Tuning",
- "previous_title": "Drill Iceberg Metastore",
- "previous_url": "/docs/drill-iceberg-metastore/",
+ "previous_title": "RDBMS Metastore",
+ "previous_url": "/docs/rdbms-metastore/",
"relative_path": "_docs/performance-tuning/010-performance-tuning-introduction.md",
"title": "Performance Tuning Introduction",
"url": "/docs/performance-tuning-introduction/"
@@ -10827,6 +10869,27 @@
"title": "Querying the INFORMATION SCHEMA",
"url": "/docs/querying-the-information-schema/"
},
+ "RDBMS Metastore": {
+ "breadcrumbs": [
+ {
+ "title": "Drill Metastore",
+ "url": "/docs/drill-metastore/"
+ },
+ {
+ "title": "Performance Tuning",
+ "url": "/docs/performance-tuning/"
+ }
+ ],
+ "children": [],
+ "next_title": "Performance Tuning Introduction",
+ "next_url": "/docs/performance-tuning-introduction/",
+ "parent": "Drill Metastore",
+ "previous_title": "Drill Iceberg Metastore",
+ "previous_url": "/docs/drill-iceberg-metastore/",
+ "relative_path": "_docs/performance-tuning/drill-metastore/030-rdbms-metastore.md",
+ "title": "RDBMS Metastore",
+ "url": "/docs/rdbms-metastore/"
+ },
"RDBMS Storage Plugin": {
"breadcrumbs": [
{
@@ -20035,14 +20098,35 @@
}
],
"children": [],
- "next_title": "Performance Tuning Introduction",
- "next_url": "/docs/performance-tuning-introduction/",
+ "next_title": "RDBMS Metastore",
+ "next_url": "/docs/rdbms-metastore/",
"parent": "Drill Metastore",
"previous_title": "Using Drill Metastore",
"previous_url": "/docs/using-drill-metastore/",
- "relative_path": "_docs/performance-tuning/drill-metastore/030-drill-iceberg-metastore.md",
+ "relative_path": "_docs/performance-tuning/drill-metastore/020-drill-iceberg-metastore.md",
"title": "Drill Iceberg Metastore",
"url": "/docs/drill-iceberg-metastore/"
+ },
+ {
+ "breadcrumbs": [
+ {
+ "title": "Drill Metastore",
+ "url": "/docs/drill-metastore/"
+ },
+ {
+ "title": "Performance Tuning",
+ "url": "/docs/performance-tuning/"
+ }
+ ],
+ "children": [],
+ "next_title": "Performance Tuning Introduction",
+ "next_url": "/docs/performance-tuning-introduction/",
+ "parent": "Drill Metastore",
+ "previous_title": "Drill Iceberg Metastore",
+ "previous_url": "/docs/drill-iceberg-metastore/",
+ "relative_path": "_docs/performance-tuning/drill-metastore/030-rdbms-metastore.md",
+ "title": "RDBMS Metastore",
+ "url": "/docs/rdbms-metastore/"
}
],
"next_title": "Using Drill Metastore",
@@ -20065,8 +20149,8 @@
"next_title": "Partition Pruning",
"next_url": "/docs/partition-pruning/",
"parent": "Performance Tuning",
- "previous_title": "Drill Iceberg Metastore",
- "previous_url": "/docs/drill-iceberg-metastore/",
+ "previous_title": "RDBMS Metastore",
+ "previous_url": "/docs/rdbms-metastore/",
"relative_path": "_docs/performance-tuning/010-performance-tuning-introduction.md",
"title": "Performance Tuning Introduction",
"url": "/docs/performance-tuning-introduction/"
diff --git a/_docs/performance-tuning/drill-metastore/030-drill-iceberg-metastore.md b/_docs/performance-tuning/drill-metastore/020-drill-iceberg-metastore.md
similarity index 100%
rename from _docs/performance-tuning/drill-metastore/030-drill-iceberg-metastore.md
rename to _docs/performance-tuning/drill-metastore/020-drill-iceberg-metastore.md
diff --git a/_docs/performance-tuning/drill-metastore/030-rdbms-metastore.md b/_docs/performance-tuning/drill-metastore/030-rdbms-metastore.md
new file mode 100644
index 0000000..e10ce68
--- /dev/null
+++ b/_docs/performance-tuning/drill-metastore/030-rdbms-metastore.md
@@ -0,0 +1,161 @@
+---
+title: "RDBMS Metastore"
+slug: "RDBMS Metastore"
+parent: "Drill Metastore"
+---
+
+The RDBMS Metastore implementation allows you store Drill Metastore metadata in a configured RDBMS.
+
+## Configuration
+
+Currently, the RDBMS Metastore is not the default implementation.
+To enable the RDBMS Metastore create the `drill-metastore-override.conf` file
+in your config directory and specify the RDBMS Metastore class:
+
+```yaml
+drill.metastore: {
+ implementation.class: "org.apache.drill.metastore.rdbms.RdbmsMetastore"
+}
+```
+
+### Connection properties
+
+Use the connection properties to specify how Drill should connect to your Metastore database.
+
+`drill.metastore.rdbms.data_source.driver` - driver class name. Required.
+Note: the driver class must be included into the Drill classpath.
+The easiest way to do that is to put the driver jar file into the `$DRILL_HOME/jars/3rdparty` folder.
+Or, to make upgrades easier, in your `$DRILL_SITE/jars` folder. Drill includes the driver for SQLite.
+
+`drill.metastore.rdbms.data_source.url` - connection url. Required.
+
+`drill.metastore.rdbms.data_source.username` - database user on whose behalf the connection is
+being made. Optional, if database does not require user to connect.
+
+`drill.metastore.rdbms.data_source.password` - database user's password.
+Optional, if database does not require user's password to connect.
+
+`drill.metastore.rdbms.data_source.properties` - specifies properties which will be used
+during data source creation. See list of available [Hikari properties](https://github.com/brettwooldridge/HikariCP)
+for more details.
+
+### Default configuration
+
+Out of the box, the Drill RDBMS Metastore is configured to use the embedded file system based SQLite database.
+It will be created locally in user's home directory under `${drill.exec.zk.root}"/metastore` location.
+
+Default setup can be used only in Drill embedded mode. SQLite is an embedded database; is not distributed.
+SQLite is good for trying out the feature, for testing, for a running Drill in embedded mode,
+and perhaps for a single-node Drill "cluster". If should not be used in a multi-node cluster.
+Each Drillbit will have its own version of the truth and behavior will be undefined and incorrect.
+
+### Custom configuration
+
+`drill-metastore-override.conf` is used to customize connection details to the Drill Metastore database.
+See `drill-metastore-override-example.conf` for more details.
+
+#### Example of PostgreSQL configuration
+
+```yaml
+drill.metastore: {
+ implementation.class: "org.apache.drill.metastore.rdbms.RdbmsMetastore",
+ rdbms: {
+ data_source: {
+ driver: "org.postgresql.Driver",
+ url: "jdbc:postgresql://localhost:1234/mydb?currentSchema=drill_metastore",
+ username: "user",
+ password: "password"
+ }
+ }
+}
+```
+
+Note: as mentioned above, the PostgreSQL JDBC driver must be present in the Drill classpath.
+
+#### Example of MySQL configuration
+
+```yaml
+drill.metastore: {
+ implementation.class: "org.apache.drill.metastore.rdbms.RdbmsMetastore",
+ rdbms: {
+ data_source: {
+ driver: "com.mysql.cj.jdbc.Driver",
+ url: "jdbc:mysql://localhost:1234/drill_metastore",
+ username: "user",
+ password: "password"
+ }
+ }
+}
+```
+
+Note: as mentioned above, the MySQL JDBC driver must be present in the Drill classpath.
+
+{% include startnote.html %}
+For MySQL connector version 6+, use the <code>com.mysql.cj.jdbc.Driver</code> driver class,
+for older versions use the <code>com.mysql.jdbc.Driver</code>.
+{% include endnote.html %}
+
+## Tables structure
+
+The Drill Metastore stores several types of metadata, called components. Currently, only the `tables` component is implemented.
+The `tables` component provides metadata about Drill tables, including their segments, files, row groups and partitions.
+In Drill `tables` component unit is represented by `TableMetadataUnit` class which is applicable to any metadata type.
+The `TableMetadataUnit` class holds fields for all five metadata types within the `tables` component.
+Any fields not applicable to a particular metadata type are simply ignored and remain unset.
+
+In the RDBMS implementation of the Drill Metastore, the tables component includes five tables, one for each metadata type.
+The five tables are: `TABLES`, `SEGMENTS`, `FILES`, `ROW_GROUPS`, and `PARTITIONS`.
+See `src/main/resources/db/changelog/changes/initial_ddls.yaml` for the schema and indexes of each table.
+
+The Drill Metastore API has the following semantics:
+* most of the time all data about component is accessed;
+* data is filtered by non-complex fields, like storage plugin, workspace, table name, etc;
+* data is overwritten fully, there is no update by certain fields.
+
+Taking into account the Drill Metastore API semantics, the RDBMS Drill Metastore schema is slightly denormalized.
+Having normalized structure would lead to unnecessary joins during select, index re-indexing during update.
+
+### Table creation
+
+The RDBMS Metastore uses [Liquibase](https://www.liquibase.org/documentation/core-concepts/index.html)
+to create the needed tables during the RDBMS Metastore initialization. Users should not create any tables manually.
+
+### Database schema
+
+Liquibase uses a yaml configuration file to apply changes to the database schema: `src/main/resources/db/changelog/changelog.yaml`.
+Liquibase converts the yaml specification into the DDL / DML commands suitable required for the configured database.
+See list of supported databases: https://www.liquibase.org/databases.html.
+
+The Drill Metastore tables are created in the database schema indicated in the connection URL.
+This will be the default schema unless you specify a different schema. Drill will not create the schema, however.
+Best practice is to create a schema within your database for the Drill metastore before initializing the Metastore.
+
+Example:
+
+PostgreSQL: `jdbc:postgresql://localhost:1234/mydb?currentSchema=drill_metastore`
+
+MySQL: `jdbc:mysql://localhost:1234/drill_metastore`
+
+Since Drill will create the required tables, ensure that the database user has the following permissions in the metastore schema:
+* read and write tables;
+* create and modify database objects (tables, indexes, views, etc.).
+
+### Liquibase tables
+
+During Drill RDBMS Metastore initialization, Liquibase will create two internal tracking tables:
+`DATABASECHANGELOG` and `DATABASECHANGELOGLOCK`. They are needed to track schema changes and concurrent updates.
+See https://www.liquibase.org/get_started/how-lb-works.html for more details.
+
+## Query execution
+
+SQL queries issued to RDBMS Metastore tables are generated using [JOOQ](https://www.jooq.org/doc/3.13/manual/getting-started/).
+Drill uses the open-source version of JOOQ to generate the queries sent to the configured Metastore database.
+
+JOOQ generates SQL statements based on SQL dialect determined by database connection details.
+List of supported dialects: https://www.jooq.org/javadoc/3.13.x/org.jooq/org/jooq/SQLDialect.html.
+Note: dialects annotated with `@Pro` are not supported, since open-source version of JOOQ is used.
+
+## Supported databases
+
+The RDBMS Metastore was tested with `SQLite`, `PostreSQL` and `MySQL`. Other databases should also work
+if there is Liquibase and JOOQ support for them.
diff --git a/_sass/_site-main.scss b/_sass/_site-main.scss
index 8374782..5ebdf39 100644
--- a/_sass/_site-main.scss
+++ b/_sass/_site-main.scss
@@ -147,6 +147,7 @@ a.anchor {
#menu ul li.social-menu-item img {
width: 22px;
+ padding-bottom: 10px;
}
#menu ul li ul {