You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mj...@apache.org on 2016/08/30 15:48:20 UTC
[2/2] incubator-impala git commit: IMPALA-4001: qgen: add proof of concept tests for Query() objects

IMPALA-4001: qgen: add proof of concept tests for Query() objects

This patch adds a simple proof-of-concept test framework, and a few
tests, for the random query generator, specifically the portion of the
random query generator that is responsible for taking a Query object and
doing something with it. The two pieces of functionality I chose for
exhibition are

1. Writing the query into Impala SQL
2. Reporting characteristics in the SELECT clause (used internally)

In the interest of keeping the patch small, I have not added many tests,
nor have I chose to focus on more areas for test. On its own this is
fairly simple. As I add features to this portion of the query generator,
though, it will be more useful to test new functionality and also
regression test the framework.

Change-Id: I2ed1960430ae0af469986e33f88aecb6fa74e999
Reviewed-on: http://gerrit.cloudera.org:8080/4081
Reviewed-by: Michael Brown <mi...@cloudera.com>
Reviewed-by: Matthew Jacobs <mj...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/6bbb7fb3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/6bbb7fb3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/6bbb7fb3

Branch: refs/heads/master
Commit: 6bbb7fb3d24974a18c17c475b48d01c37013c204
Parents: e453086
Author: Michael Brown <mi...@cloudera.com>
Authored: Fri Aug 19 11:10:43 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Tue Aug 30 02:15:35 2016 +0000

----------------------------------------------------------------------
 tests/comparison/tests/README                   |  11 ++
 tests/comparison/tests/fake_query.py            | 115 +++++++++++++++++
 tests/comparison/tests/query_object_testdata.py | 126 +++++++++++++++++++
 tests/comparison/tests/test_query_objects.py    |  83 ++++++++++++
 4 files changed, 335 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6bbb7fb3/tests/comparison/tests/README
----------------------------------------------------------------------
diff --git a/tests/comparison/tests/README b/tests/comparison/tests/README
new file mode 100644
index 0000000..2859bb2
--- /dev/null
+++ b/tests/comparison/tests/README
@@ -0,0 +1,11 @@
+Purpose
+
+This directory contains tests for the Random Query Generator.
+
+How-to
+
+To run the tests we assume you have an impala-python environment already
+set up. Then:
+
+$ cd "${IMPALA_HOME}"/tests/comparison/tests
+$ impala-py.test

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6bbb7fb3/tests/comparison/tests/fake_query.py
----------------------------------------------------------------------
diff --git a/tests/comparison/tests/fake_query.py b/tests/comparison/tests/fake_query.py
new file mode 100644
index 0000000..1241b73
--- /dev/null
+++ b/tests/comparison/tests/fake_query.py
@@ -0,0 +1,115 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This module is used to make instantation of Query() objects a little easier when
+# building them for testing. In typical usage, Query objects and their attributes
+# (clauses, expressions, etc.) are instantiated with little data and built up over time.
+# That works for the query generator, because a lot of logical steps need to happen
+# before the Query building is completed. For our testing purposes, though, we need
+# completed, static Query() objects, and a way to build them up rather easily and
+# expressively.
+#
+# Thus we have lightweight functions that handle initialization and any attribute
+# setting as needed.
+#
+# TODO: As much as possible, it would be better to refactor our data structures to be
+# more testable. But we have a chicken and egg problem in that we have no tests. We have
+# chosen to leave the original datastructures alone, and after we build up some tests to
+# gain confidence, we can modify them to be more testable, and we can remove items from
+# here.
+
+from tests.comparison.common import Column, Table
+from tests.comparison.funcs import AnalyticFirstValue
+from tests.comparison.query import Query, SelectClause, SelectItem
+
+
+def FakeColumn(name, type_):
+  """
+  Return a Column, the creation of which allows the user not to have to specify the
+  first argument, which is the table to which the column belongs.
+
+  Typical use should be when creating a FakeTable, use FakeColumns as arguments.
+  """
+  return Column(None, name, type_)
+
+
+def FakeTable(name, fake_columns):
+  """
+  Return a Table consisting of one or more FakeColumns. Because Columns are added via
+  method, we support nesting here instead.
+  """
+  table = Table(name)
+  if not fake_columns:
+    raise Exception('You must supply at least one FakeColumn argument')
+  for fake_column in fake_columns:
+    table.add_col(fake_column)
+  return table
+
+
+def FakeSelectClause(*args):
+  """
+  Return a SelectClause from value expressions args. This abstracts away from the
+  user the need to explicitly make the value expression items SelectItems.
+  """
+  return SelectClause([SelectItem(_) for _ in args])
+
+
+def FakeQuery(
+    with_clause=None,
+    select_clause=None,
+    from_clause=None,
+    where_clause=None,
+    group_by_clause=None,
+    having_clause=None,
+    union_clause=None,
+    order_by_clause=None,
+    limit_clause=None
+):
+  """
+  Return a Query object constructed by the keyword args above. select_clause and
+  from_clause are required.
+  """
+  query = Query()
+  query.with_clause = with_clause
+  query.select_clause = select_clause
+  query.from_clause = from_clause
+  query.where_clause = where_clause
+  query.group_by_clause = group_by_clause
+  query.having_clause = having_clause
+  query.union_clause = union_clause
+  query.order_by_clause = order_by_clause
+  query.limit_clause = limit_clause
+  if select_clause is None or from_clause is None:
+    raise Exception('FakeQuery must at least contain a select_clause and a from_clause')
+  return query
+
+
+def FakeFirstValue(
+    val_expr,
+    partition_by_clause=None,
+    order_by_clause=None,
+    window_clause=None
+):
+  """
+  Return an AnalyticFirstValue object based on val_expr and optional clauses. The
+  clauses must be *Clause objects (see the funcs and query modules).
+  """
+  first_value = AnalyticFirstValue.create_from_args(val_expr)
+  first_value.partition_by_clause = partition_by_clause
+  first_value.order_by_clause = order_by_clause
+  first_value.window_clause = window_clause
+  return first_value

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6bbb7fb3/tests/comparison/tests/query_object_testdata.py
----------------------------------------------------------------------
diff --git a/tests/comparison/tests/query_object_testdata.py b/tests/comparison/tests/query_object_testdata.py
new file mode 100644
index 0000000..c2b3d3e
--- /dev/null
+++ b/tests/comparison/tests/query_object_testdata.py
@@ -0,0 +1,126 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from collections import namedtuple
+
+from fake_query import FakeColumn, FakeFirstValue, FakeQuery, FakeSelectClause, FakeTable
+from tests.comparison.db_types import Char, Int
+from tests.comparison.funcs import AggCount
+from tests.comparison.query import FromClause, OrderByClause
+
+
+QueryTest = namedtuple(
+    # A QueryTest object contains a Query and all data to verify about it as other
+    # attributes. This allows a new Query to be added without need to modify tests
+    # themselves. The various tests cherry-pick which test attributes they need to
+    # verify against the Query.
+    #
+    # If you add a new test, add a new attribute, or perhaps reuse one or more
+    # attributes.
+    #
+    # If you add a new test case, add a new item to QUERY_TEST_CASESs array.
+    #
+    # All attributes are required.
+    'QueryTest',
+    [
+        # string to represent readable pytest testid
+        'testid',
+        # Query object, formed via FakeQuery
+        'query',
+        # textual form of FakeQuery
+        'impala_query_string',
+        # hash representing various item counts (see SelectItem property methods)
+        'select_item_counts',
+    ]
+)
+
+
+# FakeTables must be declared for use by queries. Tables may be reused as needed for
+# multiple FakeQueries.
+SIMPLE_TABLE = FakeTable(
+    'fake_table',
+    [
+        FakeColumn('int_col', Int),
+        FakeColumn('char_col', Char),
+    ]
+)
+
+
+# All tests involving queries should be written to use this dataset.
+QUERY_TEST_CASES = [
+    QueryTest(
+        testid='select col from table',
+        query=FakeQuery(
+            select_clause=FakeSelectClause(*SIMPLE_TABLE.cols),
+            from_clause=FromClause(SIMPLE_TABLE),
+        ),
+        impala_query_string=(
+            'SELECT\n'
+            'fake_table.int_col,\n'
+            'TRIM(fake_table.char_col)\n'
+            'FROM fake_table'
+        ),
+        select_item_counts={
+            'items': 2,
+            'basic_items': 2,
+            'agg_items': 0,
+            'analytic_items': 0,
+        },
+    ),
+    QueryTest(
+        testid='select count()',
+        query=FakeQuery(
+            select_clause=FakeSelectClause(
+                AggCount.create_from_args(SIMPLE_TABLE.cols[0])),
+            from_clause=FromClause(SIMPLE_TABLE),
+        ),
+        impala_query_string=(
+            'SELECT\n'
+            'COUNT(fake_table.int_col)\n'
+            'FROM fake_table'
+        ),
+        select_item_counts={
+            'items': 1,
+            'basic_items': 0,
+            'agg_items': 1,
+            'analytic_items': 0,
+        },
+    ),
+    QueryTest(
+        testid='select first_value(col) over (order by col)',
+        query=FakeQuery(
+            select_clause=FakeSelectClause(
+                FakeFirstValue(
+                    SIMPLE_TABLE.cols[0],
+                    order_by_clause=OrderByClause([SIMPLE_TABLE.cols[0]])
+                ),
+            ),
+            from_clause=FromClause(SIMPLE_TABLE),
+        ),
+        impala_query_string=(
+            'SELECT\n'
+            'FIRST_VALUE(fake_table.int_col) OVER (ORDER BY fake_table.int_col ASC)\n'
+            'FROM fake_table'
+        ),
+        select_item_counts={
+            'items': 1,
+            'basic_items': 0,
+            'agg_items': 0,
+            'analytic_items': 1,
+        },
+    ),
+]

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6bbb7fb3/tests/comparison/tests/test_query_objects.py
----------------------------------------------------------------------
diff --git a/tests/comparison/tests/test_query_objects.py b/tests/comparison/tests/test_query_objects.py
new file mode 100644
index 0000000..b0794d2
--- /dev/null
+++ b/tests/comparison/tests/test_query_objects.py
@@ -0,0 +1,83 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pytest
+
+from tests.comparison.model_translator import SqlWriter
+
+from query_object_testdata import QUERY_TEST_CASES
+
+
+def _idfn(query_test):
+  return query_test.testid
+
+
+def verify_select_clause_items(query, expected_item_counts):
+  """
+  Verify that a well-formed Query() object's select_clause (SelectClause instance)
+  reports correct item counts. expected_item_counts should be a dictionary with keys
+  matching SelectItem property methods that report item counts and values for the
+  counts.
+  """
+  attrs_to_check = [
+      'items',
+      'basic_items',
+      'analytic_items',
+      'agg_items',
+  ]
+  select_clause = query.select_clause
+  for attr_name in attrs_to_check:
+    select_clause_attr = getattr(select_clause, attr_name)
+    expected_item_count_attr = expected_item_counts.get(attr_name)
+    actual_item_count = len(select_clause_attr)
+    assert len(select_clause_attr) == expected_item_count_attr, (
+        'item count mismatch for item "{item}": expected: {expected}; actual: '
+        '{actual}'.format(item=attr_name, expected=expected_item_count_attr,
+                          actual=actual_item_count))
+
+
+def verify_sql_matches(actual, expected, strip=True):
+  """
+  Assert that the actual and expected SQL queries match. Trailing white space is
+  stripped by default.
+  """
+  if strip:
+    actual = actual.strip()
+    expected = expected.strip()
+  assert actual == expected, 'actual SQL "{actual}" != expected SQL "{expected}"'.format(
+      actual=actual, expected=expected)
+
+
+@pytest.yield_fixture
+def sql_writer():
+  """
+  Return a SqlWriter object that is torn down at the end of each test.
+  """
+  # TODO: Later, we can parametrize on dialect, but for now, this is just PoC.
+  yield SqlWriter.create(dialect='IMPALA')
+
+
+@pytest.mark.parametrize('query_test', QUERY_TEST_CASES, ids=_idfn)
+def test_select_clause_items(query_test):
+  verify_select_clause_items(query_test.query, query_test.select_item_counts)
+
+
+@pytest.mark.parametrize('query_test', QUERY_TEST_CASES, ids=_idfn)
+def test_write_query(sql_writer, query_test):
+  verify_sql_matches(
+      sql_writer.write_query(query_test.query),
+      query_test.impala_query_string)