You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by be...@apache.org on 2021/12/06 00:45:02 UTC

[superset] 01/01: WIP

This is an automated email from the ASF dual-hosted git repository.

beto pushed a commit to branch sqlparse_unit_tests
in repository https://gitbox.apache.org/repos/asf/superset.git

commit a30da261acf93e53d0b8c04d98ad87c8eb976203
Author: Beto Dealmeida <ro...@dealmeida.net>
AuthorDate: Sun Dec 5 16:37:08 2021 -0800

    WIP
---
 superset/sql_parse.py                      |  13 +-
 tests/integration_tests/sql_parse_tests.py | 515 --------------------
 tests/unit_tests/sql_parse_tests.py        | 745 ++++++++++++++++++++++++++++-
 3 files changed, 753 insertions(+), 520 deletions(-)

diff --git a/superset/sql_parse.py b/superset/sql_parse.py
index eb590e3..9bfd8dd 100644
--- a/superset/sql_parse.py
+++ b/superset/sql_parse.py
@@ -305,6 +305,10 @@ class ParsedQuery:
 
         table_name_preceding_token = False
 
+        # If the table name is a reserved word (eg, "table_name") it won't be returned. We
+        # fix this by ensuring that at least one identifier is return after the FROM.
+        has_processed_identifier = False
+
         for item in token.tokens:
             if item.is_group and (
                 not self._is_identifier(item) or isinstance(item.tokens[0], Parenthesis)
@@ -318,16 +322,23 @@ class ParsedQuery:
                 table_name_preceding_token = True
                 continue
 
-            if item.ttype in Keyword:
+            if item.ttype in Keyword and has_processed_identifier:
                 table_name_preceding_token = False
                 continue
             if table_name_preceding_token:
                 if isinstance(item, Identifier):
                     self._process_tokenlist(item)
+                    has_processed_identifier = True
                 elif isinstance(item, IdentifierList):
                     for token2 in item.get_identifiers():
                         if isinstance(token2, TokenList):
                             self._process_tokenlist(token2)
+                    has_processed_identifier = True
+                elif item.ttype in Keyword:
+                    # convert into an identifier
+                    fixed = Identifier([Token(Name, item.value)])
+                    self._process_tokenlist(fixed)
+                    has_processed_identifier = True
             elif isinstance(item, IdentifierList):
                 if any(not self._is_identifier(token2) for token2 in item.tokens):
                     self._extract_from_token(item)
diff --git a/tests/integration_tests/sql_parse_tests.py b/tests/integration_tests/sql_parse_tests.py
index 01f8d1f..121f193 100644
--- a/tests/integration_tests/sql_parse_tests.py
+++ b/tests/integration_tests/sql_parse_tests.py
@@ -22,521 +22,6 @@ from superset.sql_parse import ParsedQuery, strip_comments_from_sql, Table
 
 
 class TestSupersetSqlParse(unittest.TestCase):
-    def extract_tables(self, query):
-        return ParsedQuery(query).tables
-
-    def test_table(self):
-        self.assertEqual(str(Table("tbname")), "tbname")
-        self.assertEqual(str(Table("tbname", "schemaname")), "schemaname.tbname")
-
-        self.assertEqual(
-            str(Table("tbname", "schemaname", "catalogname")),
-            "catalogname.schemaname.tbname",
-        )
-
-        self.assertEqual(
-            str(Table("tb.name", "schema/name", "catalog\name")),
-            "catalog%0Aame.schema%2Fname.tb%2Ename",
-        )
-
-    def test_simple_select(self):
-        query = "SELECT * FROM tbname"
-        self.assertEqual({Table("tbname")}, self.extract_tables(query))
-
-        query = "SELECT * FROM tbname foo"
-        self.assertEqual({Table("tbname")}, self.extract_tables(query))
-
-        query = "SELECT * FROM tbname AS foo"
-        self.assertEqual({Table("tbname")}, self.extract_tables(query))
-
-        # underscores
-        query = "SELECT * FROM tb_name"
-        self.assertEqual({Table("tb_name")}, self.extract_tables(query))
-
-        # quotes
-        query = 'SELECT * FROM "tbname"'
-        self.assertEqual({Table("tbname")}, self.extract_tables(query))
-
-        # unicode encoding
-        query = 'SELECT * FROM "tb_name" WHERE city = "Lübeck"'
-        self.assertEqual({Table("tb_name")}, self.extract_tables(query))
-
-        # schema
-        self.assertEqual(
-            {Table("tbname", "schemaname")},
-            self.extract_tables("SELECT * FROM schemaname.tbname"),
-        )
-
-        self.assertEqual(
-            {Table("tbname", "schemaname")},
-            self.extract_tables('SELECT * FROM "schemaname"."tbname"'),
-        )
-
-        self.assertEqual(
-            {Table("tbname", "schemaname")},
-            self.extract_tables("SELECT * FROM schemaname.tbname foo"),
-        )
-
-        self.assertEqual(
-            {Table("tbname", "schemaname")},
-            self.extract_tables("SELECT * FROM schemaname.tbname AS foo"),
-        )
-
-        self.assertEqual(
-            {Table("tbname", "schemaname", "catalogname")},
-            self.extract_tables("SELECT * FROM catalogname.schemaname.tbname"),
-        )
-
-        # Ill-defined cluster/schema/table.
-        self.assertEqual(set(), self.extract_tables("SELECT * FROM schemaname."))
-
-        self.assertEqual(
-            set(), self.extract_tables("SELECT * FROM catalogname.schemaname.")
-        )
-
-        self.assertEqual(set(), self.extract_tables("SELECT * FROM catalogname.."))
-
-        self.assertEqual(
-            set(), self.extract_tables("SELECT * FROM catalogname..tbname")
-        )
-
-        # quotes
-        query = "SELECT field1, field2 FROM tb_name"
-        self.assertEqual({Table("tb_name")}, self.extract_tables(query))
-
-        query = "SELECT t1.f1, t2.f2 FROM t1, t2"
-        self.assertEqual({Table("t1"), Table("t2")}, self.extract_tables(query))
-
-    def test_select_named_table(self):
-        query = "SELECT a.date, a.field FROM left_table a LIMIT 10"
-        self.assertEqual({Table("left_table")}, self.extract_tables(query))
-
-    def test_reverse_select(self):
-        query = "FROM t1 SELECT field"
-        self.assertEqual({Table("t1")}, self.extract_tables(query))
-
-    def test_subselect(self):
-        query = """
-          SELECT sub.*
-              FROM (
-                    SELECT *
-                      FROM s1.t1
-                     WHERE day_of_week = 'Friday'
-                   ) sub, s2.t2
-          WHERE sub.resolution = 'NONE'
-        """
-        self.assertEqual(
-            {Table("t1", "s1"), Table("t2", "s2")}, self.extract_tables(query)
-        )
-
-        query = """
-          SELECT sub.*
-              FROM (
-                    SELECT *
-                      FROM s1.t1
-                     WHERE day_of_week = 'Friday'
-                   ) sub
-          WHERE sub.resolution = 'NONE'
-        """
-        self.assertEqual({Table("t1", "s1")}, self.extract_tables(query))
-
-        query = """
-            SELECT * FROM t1
-            WHERE s11 > ANY
-             (SELECT COUNT(*) /* no hint */ FROM t2
-               WHERE NOT EXISTS
-                (SELECT * FROM t3
-                  WHERE ROW(5*t2.s1,77)=
-                    (SELECT 50,11*s1 FROM t4)));
-        """
-        self.assertEqual(
-            {Table("t1"), Table("t2"), Table("t3"), Table("t4")},
-            self.extract_tables(query),
-        )
-
-    def test_select_in_expression(self):
-        query = "SELECT f1, (SELECT count(1) FROM t2) FROM t1"
-        self.assertEqual({Table("t1"), Table("t2")}, self.extract_tables(query))
-
-        query = "SELECT f1, (SELECT count(1) FROM t2) as f2 FROM t1"
-        self.assertEqual({Table("t1"), Table("t2")}, self.extract_tables(query))
-
-    def test_parentheses(self):
-        query = "SELECT f1, (x + y) AS f2 FROM t1"
-        self.assertEqual({Table("t1")}, self.extract_tables(query))
-
-    def test_union(self):
-        query = "SELECT * FROM t1 UNION SELECT * FROM t2"
-        self.assertEqual({Table("t1"), Table("t2")}, self.extract_tables(query))
-
-        query = "SELECT * FROM t1 UNION ALL SELECT * FROM t2"
-        self.assertEqual({Table("t1"), Table("t2")}, self.extract_tables(query))
-
-        query = "SELECT * FROM t1 INTERSECT ALL SELECT * FROM t2"
-        self.assertEqual({Table("t1"), Table("t2")}, self.extract_tables(query))
-
-    def test_select_from_values(self):
-        query = "SELECT * FROM VALUES (13, 42)"
-        self.assertFalse(self.extract_tables(query))
-
-    def test_select_array(self):
-        query = """
-            SELECT ARRAY[1, 2, 3] AS my_array
-            FROM t1 LIMIT 10
-        """
-        self.assertEqual({Table("t1")}, self.extract_tables(query))
-
-    def test_select_if(self):
-        query = """
-            SELECT IF(CARDINALITY(my_array) >= 3, my_array[3], NULL)
-            FROM t1 LIMIT 10
-        """
-        self.assertEqual({Table("t1")}, self.extract_tables(query))
-
-    # SHOW TABLES ((FROM | IN) qualifiedName)? (LIKE pattern=STRING)?
-    def test_show_tables(self):
-        query = "SHOW TABLES FROM s1 like '%order%'"
-        # TODO: figure out what should code do here
-        self.assertEqual({Table("s1")}, self.extract_tables(query))
-        # Expected behavior is below, it is fixed in sqlparse>=3.1
-        # However sqlparse==3.1 breaks some sql formatting.
-        # self.assertEqual(set(), self.extract_tables(query))
-
-    # SHOW COLUMNS (FROM | IN) qualifiedName
-    def test_show_columns(self):
-        query = "SHOW COLUMNS FROM t1"
-        self.assertEqual({Table("t1")}, self.extract_tables(query))
-
-    def test_where_subquery(self):
-        query = """
-          SELECT name
-            FROM t1
-            WHERE regionkey = (SELECT max(regionkey) FROM t2)
-        """
-        self.assertEqual({Table("t1"), Table("t2")}, self.extract_tables(query))
-
-        query = """
-          SELECT name
-            FROM t1
-            WHERE regionkey IN (SELECT regionkey FROM t2)
-        """
-        self.assertEqual({Table("t1"), Table("t2")}, self.extract_tables(query))
-
-        query = """
-          SELECT name
-            FROM t1
-            WHERE regionkey EXISTS (SELECT regionkey FROM t2)
-        """
-        self.assertEqual({Table("t1"), Table("t2")}, self.extract_tables(query))
-
-    # DESCRIBE | DESC qualifiedName
-    def test_describe(self):
-        self.assertEqual({Table("t1")}, self.extract_tables("DESCRIBE t1"))
-
-    # SHOW PARTITIONS FROM qualifiedName (WHERE booleanExpression)?
-    # (ORDER BY sortItem (',' sortItem)*)? (LIMIT limit=(INTEGER_VALUE | ALL))?
-    def test_show_partitions(self):
-        query = """
-            SHOW PARTITIONS FROM orders
-            WHERE ds >= '2013-01-01' ORDER BY ds DESC;
-        """
-        self.assertEqual({Table("orders")}, self.extract_tables(query))
-
-    def test_join(self):
-        query = "SELECT t1.*, t2.* FROM t1 JOIN t2 ON t1.a = t2.a;"
-        self.assertEqual({Table("t1"), Table("t2")}, self.extract_tables(query))
-
-        # subquery + join
-        query = """
-            SELECT a.date, b.name FROM
-                left_table a
-                JOIN (
-                  SELECT
-                    CAST((b.year) as VARCHAR) date,
-                    name
-                  FROM right_table
-                ) b
-                ON a.date = b.date
-        """
-        self.assertEqual(
-            {Table("left_table"), Table("right_table")}, self.extract_tables(query)
-        )
-
-        query = """
-            SELECT a.date, b.name FROM
-                left_table a
-                LEFT INNER JOIN (
-                  SELECT
-                    CAST((b.year) as VARCHAR) date,
-                    name
-                  FROM right_table
-                ) b
-                ON a.date = b.date
-        """
-        self.assertEqual(
-            {Table("left_table"), Table("right_table")}, self.extract_tables(query)
-        )
-
-        query = """
-            SELECT a.date, b.name FROM
-                left_table a
-                RIGHT OUTER JOIN (
-                  SELECT
-                    CAST((b.year) as VARCHAR) date,
-                    name
-                  FROM right_table
-                ) b
-                ON a.date = b.date
-        """
-        self.assertEqual(
-            {Table("left_table"), Table("right_table")}, self.extract_tables(query)
-        )
-
-        query = """
-            SELECT a.date, b.name FROM
-                left_table a
-                FULL OUTER JOIN (
-                  SELECT
-                    CAST((b.year) as VARCHAR) date,
-                    name
-                  FROM right_table
-                ) b
-                ON a.date = b.date
-        """
-        self.assertEqual(
-            {Table("left_table"), Table("right_table")}, self.extract_tables(query)
-        )
-
-        # TODO: add SEMI join support, SQL Parse does not handle it.
-        # query = """
-        #     SELECT a.date, b.name FROM
-        #         left_table a
-        #         LEFT SEMI JOIN (
-        #           SELECT
-        #             CAST((b.year) as VARCHAR) date,
-        #             name
-        #           FROM right_table
-        #         ) b
-        #         ON a.date = b.date
-        # """
-        # self.assertEqual({'left_table', 'right_table'},
-        #                   sql_parse.extract_tables(query))
-
-    def test_combinations(self):
-        query = """
-            SELECT * FROM t1
-            WHERE s11 > ANY
-             (SELECT * FROM t1 UNION ALL SELECT * FROM (
-               SELECT t6.*, t3.* FROM t6 JOIN t3 ON t6.a = t3.a) tmp_join
-               WHERE NOT EXISTS
-                (SELECT * FROM t3
-                  WHERE ROW(5*t3.s1,77)=
-                    (SELECT 50,11*s1 FROM t4)));
-        """
-        self.assertEqual(
-            {Table("t1"), Table("t3"), Table("t4"), Table("t6")},
-            self.extract_tables(query),
-        )
-
-        query = """
-        SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM EmployeeS)
-            AS S1) AS S2) AS S3;
-        """
-        self.assertEqual({Table("EmployeeS")}, self.extract_tables(query))
-
-    def test_with(self):
-        query = """
-            WITH
-              x AS (SELECT a FROM t1),
-              y AS (SELECT a AS b FROM t2),
-              z AS (SELECT b AS c FROM t3)
-            SELECT c FROM z;
-        """
-        self.assertEqual(
-            {Table("t1"), Table("t2"), Table("t3")}, self.extract_tables(query)
-        )
-
-        query = """
-            WITH
-              x AS (SELECT a FROM t1),
-              y AS (SELECT a AS b FROM x),
-              z AS (SELECT b AS c FROM y)
-            SELECT c FROM z;
-        """
-        self.assertEqual({Table("t1")}, self.extract_tables(query))
-
-    def test_reusing_aliases(self):
-        query = """
-            with q1 as ( select key from q2 where key = '5'),
-            q2 as ( select key from src where key = '5')
-            select * from (select key from q1) a;
-        """
-        self.assertEqual({Table("src")}, self.extract_tables(query))
-
-    def test_multistatement(self):
-        query = "SELECT * FROM t1; SELECT * FROM t2"
-        self.assertEqual({Table("t1"), Table("t2")}, self.extract_tables(query))
-
-        query = "SELECT * FROM t1; SELECT * FROM t2;"
-        self.assertEqual({Table("t1"), Table("t2")}, self.extract_tables(query))
-
-    def test_update_not_select(self):
-        sql = ParsedQuery("UPDATE t1 SET col1 = NULL")
-        self.assertEqual(False, sql.is_select())
-
-    def test_set(self):
-        sql = ParsedQuery(
-            """
-            -- comment
-            SET hivevar:desc='Legislators';
-        """
-        )
-
-        self.assertEqual(True, sql.is_set())
-        self.assertEqual(False, sql.is_select())
-
-        self.assertEqual(True, ParsedQuery("set hivevar:desc='bla'").is_set())
-        self.assertEqual(False, ParsedQuery("SELECT 1").is_set())
-
-    def test_show(self):
-        sql = ParsedQuery(
-            """
-            -- comment
-            SHOW LOCKS test EXTENDED;
-            -- comment
-        """
-        )
-
-        self.assertEqual(True, sql.is_show())
-        self.assertEqual(False, sql.is_select())
-
-        self.assertEqual(True, ParsedQuery("SHOW TABLES").is_show())
-        self.assertEqual(True, ParsedQuery("shOw TABLES").is_show())
-        self.assertEqual(True, ParsedQuery("show TABLES").is_show())
-        self.assertEqual(False, ParsedQuery("SELECT 1").is_show())
-
-    def test_explain(self):
-        sql = ParsedQuery("EXPLAIN SELECT 1")
-
-        self.assertEqual(True, sql.is_explain())
-        self.assertEqual(False, sql.is_select())
-
-    def test_complex_extract_tables(self):
-        query = """SELECT sum(m_examples) AS "sum__m_example"
-            FROM
-              (SELECT COUNT(DISTINCT id_userid) AS m_examples,
-                      some_more_info
-               FROM my_b_table b
-               JOIN my_t_table t ON b.ds=t.ds
-               JOIN my_l_table l ON b.uid=l.uid
-               WHERE b.rid IN
-                   (SELECT other_col
-                    FROM inner_table)
-                 AND l.bla IN ('x', 'y')
-               GROUP BY 2
-               ORDER BY 2 ASC) AS "meh"
-            ORDER BY "sum__m_example" DESC
-            LIMIT 10;"""
-        self.assertEqual(
-            {
-                Table("my_l_table"),
-                Table("my_b_table"),
-                Table("my_t_table"),
-                Table("inner_table"),
-            },
-            self.extract_tables(query),
-        )
-
-    def test_complex_extract_tables2(self):
-        query = """SELECT *
-            FROM table_a AS a, table_b AS b, table_c as c
-            WHERE a.id = b.id and b.id = c.id"""
-        self.assertEqual(
-            {Table("table_a"), Table("table_b"), Table("table_c")},
-            self.extract_tables(query),
-        )
-
-    def test_mixed_from_clause(self):
-        query = """SELECT *
-            FROM table_a AS a, (select * from table_b) AS b, table_c as c
-            WHERE a.id = b.id and b.id = c.id"""
-        self.assertEqual(
-            {Table("table_a"), Table("table_b"), Table("table_c")},
-            self.extract_tables(query),
-        )
-
-    def test_nested_selects(self):
-        query = """
-            select (extractvalue(1,concat(0x7e,(select GROUP_CONCAT(TABLE_NAME)
-            from INFORMATION_SCHEMA.COLUMNS
-            WHERE TABLE_SCHEMA like "%bi%"),0x7e)));
-        """
-        self.assertEqual(
-            {Table("COLUMNS", "INFORMATION_SCHEMA")}, self.extract_tables(query)
-        )
-        query = """
-            select (extractvalue(1,concat(0x7e,(select GROUP_CONCAT(COLUMN_NAME)
-            from INFORMATION_SCHEMA.COLUMNS
-            WHERE TABLE_NAME="bi_achivement_daily"),0x7e)));
-        """
-        self.assertEqual(
-            {Table("COLUMNS", "INFORMATION_SCHEMA")}, self.extract_tables(query)
-        )
-
-    def test_complex_extract_tables3(self):
-        query = """SELECT somecol AS somecol
-            FROM
-              (WITH bla AS
-                 (SELECT col_a
-                  FROM a
-                  WHERE 1=1
-                    AND column_of_choice NOT IN
-                      ( SELECT interesting_col
-                       FROM b ) ),
-                    rb AS
-                 ( SELECT yet_another_column
-                  FROM
-                    ( SELECT a
-                     FROM c
-                     GROUP BY the_other_col ) not_table
-                  LEFT JOIN bla foo ON foo.prop = not_table.bad_col0
-                  WHERE 1=1
-                  GROUP BY not_table.bad_col1 ,
-                           not_table.bad_col2 ,
-                  ORDER BY not_table.bad_col_3 DESC ,
-                           not_table.bad_col4 ,
-                           not_table.bad_col5) SELECT random_col
-               FROM d
-               WHERE 1=1
-               UNION ALL SELECT even_more_cols
-               FROM e
-               WHERE 1=1
-               UNION ALL SELECT lets_go_deeper
-               FROM f
-               WHERE 1=1
-            WHERE 2=2
-            GROUP BY last_col
-            LIMIT 50000;"""
-        self.assertEqual(
-            {Table("a"), Table("b"), Table("c"), Table("d"), Table("e"), Table("f")},
-            self.extract_tables(query),
-        )
-
-    def test_complex_cte_with_prefix(self):
-        query = """
-        WITH CTE__test (SalesPersonID, SalesOrderID, SalesYear)
-        AS (
-            SELECT SalesPersonID, SalesOrderID, YEAR(OrderDate) AS SalesYear
-            FROM SalesOrderHeader
-            WHERE SalesPersonID IS NOT NULL
-        )
-        SELECT SalesPersonID, COUNT(SalesOrderID) AS TotalSales, SalesYear
-        FROM CTE__test
-        GROUP BY SalesYear, SalesPersonID
-        ORDER BY SalesPersonID, SalesYear;
-        """
-        self.assertEqual({Table("SalesOrderHeader")}, self.extract_tables(query))
-
     def test_get_query_with_new_limit_comment(self):
         sql = "SELECT * FROM birth_names -- SOME COMMENT"
         parsed = ParsedQuery(sql)
diff --git a/tests/unit_tests/sql_parse_tests.py b/tests/unit_tests/sql_parse_tests.py
index 927ca1c..71fe9e5 100644
--- a/tests/unit_tests/sql_parse_tests.py
+++ b/tests/unit_tests/sql_parse_tests.py
@@ -17,12 +17,749 @@
 
 # pylint: disable=invalid-name
 
+import unittest
+from typing import Set
+
 import sqlparse
 
-from superset.sql_parse import ParsedQuery
+from superset.sql_parse import ParsedQuery, Table
+
+
+def extract_tables(query: str) -> Set[Table]:
+    """
+    Helper function to extract tables referenced in a query.
+    """
+    return ParsedQuery(query).tables
+
+
+def test_table() -> None:
+    """
+    Test the ``Table`` class and its string conversion.
+
+    Special characters in the table, schema, or catalog name should be escaped correctly.
+    """
+    assert str(Table("table_name")) == "table_name"
+    assert str(Table("table_name", "schema_name")) == "schema_name.table_name"
+    assert (
+        str(Table("table_name", "schema_name", "catalog_name"))
+        == "catalog_name.schema_name.table_name"
+    )
+    assert (
+        str(Table("table.name", "schema/name", "catalog\nname"))
+        == "catalog%0Aname.schema%2Fname.table%2Ename"
+    )
+
+
+def test_extract_tables() -> None:
+    """
+    Test that referenced tables are parsed correctly from the SQL.
+    """
+    assert extract_tables("SELECT * FROM tbname") == {Table("tbname")}
+    assert extract_tables("SELECT * FROM tbname foo") == {Table("tbname")}
+    assert extract_tables("SELECT * FROM tbname AS foo") == {Table("tbname")}
+
+    # underscore
+    assert extract_tables("SELECT * FROM tb_name") == {Table("tb_name")}
+
+    # quotes
+    assert extract_tables('SELECT * FROM "tbname"') == {Table("tbname")}
+
+    # unicode
+    assert extract_tables('SELECT * FROM "tb_name" WHERE city = "Lübeck"') == {
+        Table("tb_name")
+    }
+
+    # columns
+    assert extract_tables("SELECT field1, field2 FROM tb_name") == {Table("tb_name")}
+    assert extract_tables("SELECT t1.f1, t2.f2 FROM t1, t2") == {
+        Table("t1"),
+        Table("t2"),
+    }
+
+    # named table
+    assert extract_tables("SELECT a.date, a.field FROM left_table a LIMIT 10") == {
+        Table("left_table")
+    }
+
+    # reverse select
+    assert extract_tables("FROM t1 SELECT field") == {Table("t1")}
+
+
+def test_extract_tables_subselect() -> None:
+    """
+    Test that tables inside subselects are parsed correctly.
+    """
+    assert (
+        extract_tables(
+            """
+SELECT sub.*
+FROM (
+    SELECT *
+        FROM s1.t1
+        WHERE day_of_week = 'Friday'
+    ) sub, s2.t2
+WHERE sub.resolution = 'NONE'
+"""
+        )
+        == {Table("t1", "s1"), Table("t2", "s2")}
+    )
+
+    assert (
+        extract_tables(
+            """
+SELECT sub.*
+FROM (
+    SELECT *
+    FROM s1.t1
+    WHERE day_of_week = 'Friday'
+) sub
+WHERE sub.resolution = 'NONE'
+"""
+        )
+        == {Table("t1", "s1")}
+    )
+
+    assert (
+        extract_tables(
+            """
+SELECT * FROM t1
+WHERE s11 > ANY (
+    SELECT COUNT(*) /* no hint */ FROM t2
+    WHERE NOT EXISTS (
+        SELECT * FROM t3
+        WHERE ROW(5*t2.s1,77)=(
+            SELECT 50,11*s1 FROM t4
+        )
+    )
+)
+"""
+        )
+        == {Table("t1"), Table("t2"), Table("t3"), Table("t4")}
+    )
+
+
+def test_extract_tables_select_in_expression() -> None:
+    """
+    Test that parser works with ``SELECT``s used as expressions.
+    """
+    assert extract_tables("SELECT f1, (SELECT count(1) FROM t2) FROM t1") == {
+        Table("t1"),
+        Table("t2"),
+    }
+    assert extract_tables("SELECT f1, (SELECT count(1) FROM t2) as f2 FROM t1") == {
+        Table("t1"),
+        Table("t2"),
+    }
+
+
+def test_extract_tables_parenthesis() -> None:
+    """
+    Test that parenthesis are parsed correctly.
+    """
+    assert extract_tables("SELECT f1, (x + y) AS f2 FROM t1") == {Table("t1")}
+
+
+def test_extract_tables_with_schema() -> None:
+    """
+    Test that schemas are parsed correctly.
+    """
+    assert extract_tables("SELECT * FROM schemaname.tbname") == {
+        Table("tbname", "schemaname")
+    }
+    assert extract_tables('SELECT * FROM "schemaname"."tbname"') == {
+        Table("tbname", "schemaname")
+    }
+    assert extract_tables('SELECT * FROM "schemaname"."tbname" foo') == {
+        Table("tbname", "schemaname")
+    }
+    assert extract_tables('SELECT * FROM "schemaname"."tbname" AS foo') == {
+        Table("tbname", "schemaname")
+    }
+
+
+def test_extract_tables_union() -> None:
+    """
+    Test that ``UNION`` queries work as expected.
+    """
+    assert extract_tables("SELECT * FROM t1 UNION SELECT * FROM t2") == {
+        Table("t1"),
+        Table("t2"),
+    }
+    assert extract_tables("SELECT * FROM t1 UNION ALL SELECT * FROM t2") == {
+        Table("t1"),
+        Table("t2"),
+    }
+    assert extract_tables("SELECT * FROM t1 INTERSECT ALL SELECT * FROM t2") == {
+        Table("t1"),
+        Table("t2"),
+    }
+
+
+def test_extract_tables_select_from_values() -> None:
+    """
+    Test that selecting from values returns no tables.
+    """
+    assert extract_tables("SELECT * FROM VALUES (13, 42)") == set()
+
+
+def test_extract_tables_select_array() -> None:
+    """
+    Test that queries selecting arrays work as expected.
+    """
+    assert (
+        extract_tables(
+            """
+SELECT ARRAY[1, 2, 3] AS my_array
+FROM t1 LIMIT 10
+"""
+        )
+        == {Table("t1")}
+    )
+
+
+def test_extract_tables_select_if() -> None:
+    """
+    Test that queries with an ``IF`` work as expected.
+    """
+    assert (
+        extract_tables(
+            """
+SELECT IF(CARDINALITY(my_array) >= 3, my_array[3], NULL)
+FROM t1 LIMIT 10
+"""
+        )
+        == {Table("t1")}
+    )
+
+
+def test_extract_tables_with_catalog() -> None:
+    """
+    Test that catalogs are parsed correctly.
+    """
+    assert extract_tables("SELECT * FROM catalogname.schemaname.tbname") == {
+        Table("tbname", "schemaname", "catalogname")
+    }
+
+
+def test_extract_tables_illdefined() -> None:
+    """
+    Test that ill-defined tables return an empty set.
+    """
+    assert extract_tables("SELECT * FROM schemaname.") == set()
+    assert extract_tables("SELECT * FROM catalogname.schemaname.") == set()
+    assert extract_tables("SELECT * FROM catalogname..") == set()
+    assert extract_tables("SELECT * FROM catalogname..tbname") == set()
+
+
+@unittest.skip("Requires sqlparse>=3.1")
+def test_extract_tables_show_tables_from() -> None:
+    """
+    Test ``SHOW TABLES FROM``.
+
+    This is currently broken in the pinned version of sqlparse, and fixed in
+    ``sqlparse>=3.1``. However, ``sqlparse==3.1`` breaks some sql formatting.
+    """
+    assert extract_tables("SHOW TABLES FROM s1 like '%order%'") == set()
+
+
+def test_extract_tables_show_columns_from() -> None:
+    """
+    Test ``SHOW COLUMNS FROM``.
+    """
+    assert extract_tables("SHOW COLUMNS FROM t1") == {Table("t1")}
+
+
+def test_extract_tables_where_subquery() -> None:
+    """
+    Test that tables in a ``WHERE`` subquery are parsed correctly.
+    """
+    assert (
+        extract_tables(
+            """
+SELECT name
+FROM t1
+WHERE regionkey = (SELECT max(regionkey) FROM t2)
+"""
+        )
+        == {Table("t1"), Table("t2")}
+    )
+
+    assert (
+        extract_tables(
+            """
+SELECT name
+FROM t1
+WHERE regionkey IN (SELECT regionkey FROM t2)
+"""
+        )
+        == {Table("t1"), Table("t2")}
+    )
+
+    assert (
+        extract_tables(
+            """
+SELECT name
+FROM t1
+WHERE regionkey EXISTS (SELECT regionkey FROM t2)
+"""
+        )
+        == {Table("t1"), Table("t2")}
+    )
+
+
+def test_extract_tables_describe() -> None:
+    """
+    Test ``DESCRIBE``.
+    """
+    assert extract_tables("DESCRIBE t1") == {Table("t1")}
+
+
+def test_extract_tables_show_partitions() -> None:
+    """
+    Test ``SHOW PARTITIONS``.
+    """
+    assert (
+        extract_tables(
+            """
+SHOW PARTITIONS FROM orders
+WHERE ds >= '2013-01-01' ORDER BY ds DESC
+"""
+        )
+        == {Table("orders")}
+    )
+
+
+def test_extract_tables_join() -> None:
+    """
+    Test joins.
+    """
+    assert extract_tables("SELECT t1.*, t2.* FROM t1 JOIN t2 ON t1.a = t2.a;") == {
+        Table("t1"),
+        Table("t2"),
+    }
+
+    assert (
+        extract_tables(
+            """
+SELECT a.date, b.name
+FROM left_table a
+JOIN (
+    SELECT
+        CAST((b.year) as VARCHAR) date,
+        name
+    FROM right_table
+) b
+ON a.date = b.date
+"""
+        )
+        == {Table("left_table"), Table("right_table")}
+    )
+
+    assert (
+        extract_tables(
+            """
+SELECT a.date, b.name
+FROM left_table a
+LEFT INNER JOIN (
+    SELECT
+        CAST((b.year) as VARCHAR) date,
+        name
+    FROM right_table
+) b
+ON a.date = b.date
+"""
+        )
+        == {Table("left_table"), Table("right_table")}
+    )
+
+    assert (
+        extract_tables(
+            """
+SELECT a.date, b.name
+FROM left_table a
+RIGHT OUTER JOIN (
+    SELECT
+        CAST((b.year) as VARCHAR) date,
+        name
+    FROM right_table
+) b
+ON a.date = b.date
+"""
+        )
+        == {Table("left_table"), Table("right_table")}
+    )
+
+    assert (
+        extract_tables(
+            """
+SELECT a.date, b.name
+FROM left_table a
+FULL OUTER JOIN (
+    SELECT
+        CAST((b.year) as VARCHAR) date,
+        name
+        FROM right_table
+) b
+ON a.date = b.date
+"""
+        )
+        == {Table("left_table"), Table("right_table")}
+    )
+
+
+def test_extract_tables_semi_join() -> None:
+    """
+    Test ``LEFT SEMI JOIN``.
+    """
+    assert (
+        extract_tables(
+            """
+SELECT a.date, b.name
+FROM left_table a
+LEFT SEMI JOIN (
+    SELECT
+        CAST((b.year) as VARCHAR) date,
+        name
+    FROM right_table
+) b
+ON a.data = b.date
+"""
+        )
+        == {Table("left_table"), Table("right_table")}
+    )
+
+
+def test_extract_tables_combinations() -> None:
+    """
+    Test a complex case with nested queries.
+    """
+    assert (
+        extract_tables(
+            """
+SELECT * FROM t1
+WHERE s11 > ANY (
+    SELECT * FROM t1 UNION ALL SELECT * FROM (
+        SELECT t6.*, t3.* FROM t6 JOIN t3 ON t6.a = t3.a
+    ) tmp_join
+    WHERE NOT EXISTS (
+        SELECT * FROM t3
+        WHERE ROW(5*t3.s1,77)=(
+            SELECT 50,11*s1 FROM t4
+        )
+    )
+)
+"""
+        )
+        == {Table("t1"), Table("t3"), Table("t4"), Table("t6")}
+    )
+
+    assert (
+        extract_tables(
+            """
+SELECT * FROM (
+    SELECT * FROM (
+        SELECT * FROM (
+            SELECT * FROM EmployeeS
+        ) AS S1
+    ) AS S2
+) AS S3
+"""
+        )
+        == {Table("EmployeeS")}
+    )
+
+
+def test_extract_tables_with() -> None:
+    """
+    Test ``WITH``.
+    """
+    assert (
+        extract_tables(
+            """
+WITH
+    x AS (SELECT a FROM t1),
+    y AS (SELECT a AS b FROM t2),
+    z AS (SELECT b AS c FROM t3)
+SELECT c FROM z
+"""
+        )
+        == {Table("t1"), Table("t2"), Table("t3")}
+    )
+
+    assert (
+        extract_tables(
+            """
+WITH
+    x AS (SELECT a FROM t1),
+    y AS (SELECT a AS b FROM x),
+    z AS (SELECT b AS c FROM y)
+SELECT c FROM z
+"""
+        )
+        == {Table("t1")}
+    )
+
+
+def test_extract_tables_reusing_aliases() -> None:
+    """
+    Test that the parser follows aliases.
+    """
+    assert (
+        extract_tables(
+            """
+with q1 as ( select key from q2 where key = '5'),
+q2 as ( select key from src where key = '5')
+select * from (select key from q1) a
+"""
+        )
+        == {Table("src")}
+    )
+
+
+def test_extract_tables_multistatement() -> None:
+    """
+    Test that the parser works with multiple statements.
+    """
+    assert extract_tables("SELECT * FROM t1; SELECT * FROM t2") == {
+        Table("t1"),
+        Table("t2"),
+    }
+    assert extract_tables("SELECT * FROM t1; SELECT * FROM t2;") == {
+        Table("t1"),
+        Table("t2"),
+    }
+
+
+def test_extract_tables_keyword() -> None:
+    """
+    Test that table names that are keywords work as expected.
+
+    If the table name is a ``sqlparse`` reserved keyword (eg, "table_name") the parser
+    needs extra logic to identify it.
+    """
+    assert extract_tables("SELECT * FROM table_name") == {Table("table_name")}
+    assert extract_tables("SELECT * FROM table_name AS foo") == {Table("table_name")}
+
+    # these 3 are considered keywords
+    assert extract_tables("SELECT * FROM catalog_name.schema_name.table_name") == {
+        Table("table_name", "schema_name", "catalog_name")
+    }
+
+
+def test_extract_tables_complex() -> None:
+    """
+    Test a few complex queries.
+    """
+    assert extract_tables(
+        """
+SELECT sum(m_examples) AS "sum__m_example"
+FROM (
+    SELECT
+        COUNT(DISTINCT id_userid) AS m_examples,
+        some_more_info
+    FROM my_b_table b
+    JOIN my_t_table t ON b.ds=t.ds
+    JOIN my_l_table l ON b.uid=l.uid
+    WHERE
+        b.rid IN (
+            SELECT other_col
+            FROM inner_table
+        )
+        AND l.bla IN ('x', 'y')
+    GROUP BY 2
+    ORDER BY 2 ASC
+) AS "meh"
+ORDER BY "sum__m_example" DESC
+LIMIT 10;
+"""
+    ) == {
+        Table("my_l_table"),
+        Table("my_b_table"),
+        Table("my_t_table"),
+        Table("inner_table"),
+    }
+
+    assert (
+        extract_tables(
+            """
+SELECT *
+FROM table_a AS a, table_b AS b, table_c as c
+WHERE a.id = b.id and b.id = c.id
+"""
+        )
+        == {Table("table_a"), Table("table_b"), Table("table_c")}
+    )
+
+    assert (
+        extract_tables(
+            """
+SELECT somecol AS somecol
+FROM (
+    WITH bla AS (
+        SELECT col_a
+        FROM a
+        WHERE
+            1=1
+            AND column_of_choice NOT IN (
+                SELECT interesting_col
+                FROM b
+            )
+    ),
+    rb AS ( 
+        SELECT yet_another_column
+        FROM ( 
+            SELECT a
+            FROM c
+            GROUP BY the_other_col
+        ) not_table
+        LEFT JOIN bla foo
+        ON foo.prop = not_table.bad_col0
+        WHERE 1=1
+        GROUP BY
+            not_table.bad_col1 ,
+            not_table.bad_col2 ,
+        ORDER BY not_table.bad_col_3 DESC ,
+            not_table.bad_col4 ,
+            not_table.bad_col5
+    )
+    SELECT random_col
+    FROM d
+    WHERE 1=1
+    UNION ALL SELECT even_more_cols
+    FROM e
+    WHERE 1=1
+    UNION ALL SELECT lets_go_deeper
+    FROM f
+    WHERE 1=1
+    WHERE 2=2
+    GROUP BY last_col
+    LIMIT 50000
+)
+"""
+        )
+        == {Table("a"), Table("b"), Table("c"), Table("d"), Table("e"), Table("f")}
+    )
+
+
+def test_extract_tables_mixed_from_clause() -> None:
+    """
+    Test that the parser handles a ``FROM`` clause with table and subselect.
+    """
+    assert (
+        extract_tables(
+            """
+SELECT *
+FROM table_a AS a, (select * from table_b) AS b, table_c as c
+WHERE a.id = b.id and b.id = c.id
+"""
+        )
+        == {Table("table_a"), Table("table_b"), Table("table_c")}
+    )
+
+
+def test_extract_tables_nested_select() -> None:
+    """
+    Test that the parser handles selects inside functions.
+    """
+    assert (
+        extract_tables(
+            """
+select (extractvalue(1,concat(0x7e,(select GROUP_CONCAT(TABLE_NAME)
+from INFORMATION_SCHEMA.COLUMNS
+WHERE TABLE_SCHEMA like "%bi%"),0x7e)));
+"""
+        )
+        == {Table("COLUMNS", "INFORMATION_SCHEMA")}
+    )
+
+    assert (
+        extract_tables(
+            """
+select (extractvalue(1,concat(0x7e,(select GROUP_CONCAT(COLUMN_NAME)
+from INFORMATION_SCHEMA.COLUMNS
+WHERE TABLE_NAME="bi_achivement_daily"),0x7e)));
+"""
+        )
+        == {Table("COLUMNS", "INFORMATION_SCHEMA")}
+    )
+
+
+def test_extract_tables_complex_cte_with_prefix() -> None:
+    """
+    Test that the parser handles CTEs with prefixes.
+    """
+    assert (
+        extract_tables(
+            """
+WITH CTE__test (SalesPersonID, SalesOrderID, SalesYear)
+AS (
+    SELECT SalesPersonID, SalesOrderID, YEAR(OrderDate) AS SalesYear
+    FROM SalesOrderHeader
+    WHERE SalesPersonID IS NOT NULL
+)
+SELECT SalesPersonID, COUNT(SalesOrderID) AS TotalSales, SalesYear
+FROM CTE__test
+GROUP BY SalesYear, SalesPersonID
+ORDER BY SalesPersonID, SalesYear;
+"""
+        )
+        == {Table("SalesOrderHeader")}
+    )
+
+
+def test_update() -> None:
+    """
+    Test that ``UPDATE`` is not detected as ``SELECT``.
+    """
+    assert ParsedQuery("UPDATE t1 SET col1 = NULL").is_select() is False
+
+
+def test_set() -> None:
+    """
+    Test that ``SET`` is detected correctly.
+    """
+    query = ParsedQuery(
+        """
+-- comment
+SET hivevar:desc='Legislators';
+"""
+    )
+    assert query.is_set() is True
+    assert query.is_select() is False
+
+    assert ParsedQuery("set hivevar:desc='bla'").is_set() is True
+    assert ParsedQuery("SELECT 1").is_set() is False
+
+
+def test_show() -> None:
+    """
+    Test that ``SHOW`` is detected correctly.
+    """
+    query = ParsedQuery(
+        """
+-- comment
+SHOW LOCKS test EXTENDED;
+-- comment
+"""
+    )
+    assert query.is_show() is True
+    assert query.is_select() is False
+
+    assert ParsedQuery("SHOW TABLES").is_show() is True
+    assert ParsedQuery("shOw TABLES").is_show() is True
+    assert ParsedQuery("show TABLES").is_show() is True
+    assert ParsedQuery("SELECT 1").is_show() is False
+
+
+def test_explain() -> None:
+    """
+    Test that ``EXPLAIN`` is detected correctly.
+    """
+    assert ParsedQuery("EXPLAIN SELECT 1").is_explain() is True
+    assert ParsedQuery("EXPLAIN SELECT 1").is_select() is False
 
 
-def test_cte_with_comments_is_select():
+def test_is_select_cte_with_comments() -> None:
     """
     Some CTES with comments are not correctly identified as SELECTS.
     """
@@ -53,7 +790,7 @@ INNER JOIN blah2 ON blah2.team_id = blah.team_id"""
     assert sql.is_select()
 
 
-def test_cte_is_select():
+def test_cte_is_select() -> None:
     """
     Some CTEs are not correctly identified as SELECTS.
     """
@@ -76,7 +813,7 @@ FROM foo f"""
     assert sql.is_select()
 
 
-def test_unknown_select():
+def test_unknown_select() -> None:
     """
     Test that `is_select` works when sqlparse fails to identify the type.
     """