You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@impala.apache.org by ta...@apache.org on 2019/02/10 20:24:46 UTC

[impala] branch master updated (022ba2b -> 8b8d935)

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.


    from 022ba2b  IMPALA-8105: [DOCS] Document cache_remote_file_handles flag
     new 9d5e2a2  [DOCS] Format fixes in impala_shutdown.xml
     new df83d56  IMPALA-8175: improve tests_minicluster_obj
     new adde66b  IMPALA-7985: Port RemoteShutdown() to KRPC.
     new 4ce689e  IMPALA-8095: Detailed expression cardinality tests
     new f7df8ad  IMPALA-5872: Testcase builder for query planner
     new 9bd3ba7  sys/types.h no longer includes sys/sysmacros.h
     new c3c69ae  IMPALA-5043: diagnostics for topic staleness in AC
     new 81305f2  IMPALA-8163: Add banner showing local catalog mode on web UI when enabled.
     new 8b8d935  IMPALA-5031: `uint8_t & int` type is int

The 9 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 be/src/runtime/backend-client.h                    |   8 -
 be/src/runtime/coordinator-backend-state.cc        |  21 +-
 be/src/runtime/coordinator-backend-state.h         |   5 -
 be/src/scheduling/admission-controller.cc          |  91 +++-
 be/src/scheduling/admission-controller.h           |  19 +-
 be/src/service/client-request-state.cc             |  84 ++-
 be/src/service/control-service.cc                  |  25 +-
 be/src/service/control-service.h                   |  35 ++
 be/src/service/impala-http-handler.cc              |  25 +-
 be/src/service/impala-internal-service.cc          |   8 -
 be/src/service/impala-internal-service.h           |   2 -
 be/src/service/impala-server.cc                    |  55 +-
 be/src/service/impala-server.h                     |   6 +-
 be/src/service/query-options.cc                    |   4 +
 be/src/service/query-options.h                     |   4 +-
 be/src/util/backend-gflag-util.cc                  |   2 +
 be/src/util/bit-stream-utils.inline.h              |   5 +-
 be/src/util/default-path-handlers.cc               |   3 +
 be/src/util/disk-info.cc                           |   1 +
 bin/rat_exclude_files.txt                          |   1 +
 common/protobuf/control_service.proto              |  37 ++
 common/thrift/BackendGflags.thrift                 |   2 +
 common/thrift/CatalogService.thrift                |   3 +
 common/thrift/Frontend.thrift                      |  27 +
 common/thrift/ImpalaInternalService.thrift         |  43 +-
 common/thrift/ImpalaService.thrift                 |   5 +
 common/thrift/JniCatalog.thrift                    |   8 +-
 common/thrift/Types.thrift                         |   1 +
 docs/topics/impala_shutdown.xml                    |  21 +-
 fe/src/main/cup/sql-parser.cup                     |  72 ++-
 .../apache/impala/analysis/AnalysisContext.java    |   9 +-
 .../apache/impala/analysis/CopyTestCaseStmt.java   | 210 ++++++++
 .../java/org/apache/impala/analysis/HdfsUri.java   |  25 +-
 .../java/org/apache/impala/analysis/QueryStmt.java |  13 +
 .../org/apache/impala/analysis/SelectListItem.java |   6 +-
 .../org/apache/impala/analysis/SelectStmt.java     |  29 +-
 .../org/apache/impala/analysis/SlotDescriptor.java |   3 +
 .../org/apache/impala/analysis/StmtRewriter.java   |   2 +
 .../java/org/apache/impala/catalog/Catalog.java    |  28 +-
 .../impala/catalog/CatalogServiceCatalog.java      |  48 +-
 .../main/java/org/apache/impala/catalog/FeDb.java  |   9 +
 .../java/org/apache/impala/catalog/FeTable.java    |   9 +
 .../apache/impala/catalog/MetaStoreClientPool.java |   6 +
 .../org/apache/impala/common/FileSystemUtil.java   |  16 +
 .../java/org/apache/impala/common/JniUtil.java     |  19 +
 .../org/apache/impala/planner/HdfsScanNode.java    |  92 ++--
 .../java/org/apache/impala/planner/Planner.java    |   6 +
 .../org/apache/impala/service/BackendConfig.java   |   1 +
 .../apache/impala/service/CatalogOpExecutor.java   | 103 ++++
 .../java/org/apache/impala/service/Frontend.java   |  31 +-
 fe/src/main/jflex/sql-scanner.flex                 |   1 +
 .../impala/analysis/AnalyzeAuthStmtsTest.java      |  26 +-
 .../org/apache/impala/analysis/AnalyzerTest.java   |  19 +
 .../impala/analysis/AuthorizationStmtTest.java     |  93 ++++
 .../impala/analysis/ExprCardinalityTest.java       | 596 +++++++++++++++++++++
 .../org/apache/impala/analysis/ExprNdvTest.java    | 101 +++-
 .../impala/analysis/ExprRewriteRulesTest.java      |  24 +-
 .../org/apache/impala/analysis/ParserTest.java     |  28 +-
 .../apache/impala/common/AbstractFrontendTest.java |  58 ++
 .../impala/common/AnalysisSessionFixture.java      |  89 +++
 ...{FrontendTestBase.java => FrontendFixture.java} | 354 +++++-------
 .../org/apache/impala/common/FrontendTestBase.java | 207 +------
 .../QueryFixture.java}                             | 181 ++-----
 .../org/apache/impala/planner/CardinalityTest.java |  87 +++
 .../apache/impala/planner/TestCaseLoaderTest.java  |  75 +++
 .../impala/testutil/CatalogServiceTestCatalog.java |  33 +-
 .../testutil/EmbeddedMetastoreClientPool.java      |  76 +++
 .../apache/impala/testutil/ImpaladTestCatalog.java |  46 +-
 .../impala/testutil/PlannerTestCaseLoader.java     |  97 ++++
 testdata/NullRows/data.csv                         |  26 +
 testdata/bin/compute-table-stats.sh                |   2 +-
 testdata/bin/create-load-data.sh                   |   3 +
 testdata/bin/create-tpcds-testcase-files.sh        |  47 ++
 .../functional/functional_schema_template.sql      |  24 +
 .../datasets/functional/schema_constraints.csv     |   1 +
 .../workloads/tpcds/queries/raw/tpcds-query1.sql   |  25 +
 .../workloads/tpcds/queries/raw/tpcds-query11.sql  |  81 +++
 .../workloads/tpcds/queries/raw/tpcds-query12.sql  |  34 ++
 .../workloads/tpcds/queries/raw/tpcds-query13.sql  |  52 ++
 .../workloads/tpcds/queries/raw/tpcds-query15.sql  |  20 +
 .../workloads/tpcds/queries/raw/tpcds-query16.sql  |  31 ++
 .../workloads/tpcds/queries/raw/tpcds-query17.sql  |  45 ++
 .../workloads/tpcds/queries/raw/tpcds-query19.sql  |  25 +
 .../workloads/tpcds/queries/raw/tpcds-query2.sql   |  60 +++
 .../workloads/tpcds/queries/raw/tpcds-query20.sql  |  30 ++
 .../workloads/tpcds/queries/raw/tpcds-query21.sql  |  30 ++
 .../workloads/tpcds/queries/raw/tpcds-query25.sql  |  48 ++
 .../workloads/tpcds/queries/raw/tpcds-query26.sql  |  21 +
 .../workloads/tpcds/queries/raw/tpcds-query28.sql  |  53 ++
 .../workloads/tpcds/queries/raw/tpcds-query29.sql  |  47 ++
 .../workloads/tpcds/queries/raw/tpcds-query3.sql   |  21 +
 .../workloads/tpcds/queries/raw/tpcds-query30.sql  |  31 ++
 .../workloads/tpcds/queries/raw/tpcds-query31.sql  |  52 ++
 .../workloads/tpcds/queries/raw/tpcds-query32.sql  |  28 +
 .../workloads/tpcds/queries/raw/tpcds-query33.sql  |  75 +++
 .../workloads/tpcds/queries/raw/tpcds-query34.sql  |  31 ++
 .../workloads/tpcds/queries/raw/tpcds-query37.sql  |  17 +
 .../workloads/tpcds/queries/raw/tpcds-query39.sql  |  54 ++
 .../workloads/tpcds/queries/raw/tpcds-query4.sql   | 116 ++++
 .../workloads/tpcds/queries/raw/tpcds-query40.sql  |  28 +
 .../workloads/tpcds/queries/raw/tpcds-query42.sql  |  22 +
 .../workloads/tpcds/queries/raw/tpcds-query43.sql  |  19 +
 .../workloads/tpcds/queries/raw/tpcds-query46.sql  |  35 ++
 .../workloads/tpcds/queries/raw/tpcds-query47.sql  |  51 ++
 .../workloads/tpcds/queries/raw/tpcds-query48.sql  |  67 +++
 .../workloads/tpcds/queries/raw/tpcds-query49.sql  | 127 +++++
 .../workloads/tpcds/queries/raw/tpcds-query50.sql  |  59 ++
 .../workloads/tpcds/queries/raw/tpcds-query51.sql  |  45 ++
 .../workloads/tpcds/queries/raw/tpcds-query52.sql  |  22 +
 .../workloads/tpcds/queries/raw/tpcds-query53.sql  |  28 +
 .../workloads/tpcds/queries/raw/tpcds-query55.sql  |  14 +
 .../workloads/tpcds/queries/raw/tpcds-query56.sql  |  69 +++
 .../workloads/tpcds/queries/raw/tpcds-query57.sql  |  48 ++
 .../workloads/tpcds/queries/raw/tpcds-query58.sql  |  65 +++
 .../workloads/tpcds/queries/raw/tpcds-query59.sql  |  44 ++
 .../workloads/tpcds/queries/raw/tpcds-query6.sql   |  26 +
 .../workloads/tpcds/queries/raw/tpcds-query60.sql  |  78 +++
 .../workloads/tpcds/queries/raw/tpcds-query61.sql  |  44 ++
 .../workloads/tpcds/queries/raw/tpcds-query62.sql  |  35 ++
 .../workloads/tpcds/queries/raw/tpcds-query63.sql  |  29 +
 .../workloads/tpcds/queries/raw/tpcds-query64.sql  | 120 +++++
 .../workloads/tpcds/queries/raw/tpcds-query65.sql  |  29 +
 .../workloads/tpcds/queries/raw/tpcds-query66.sql  | 220 ++++++++
 .../workloads/tpcds/queries/raw/tpcds-query68.sql  |  42 ++
 .../workloads/tpcds/queries/raw/tpcds-query69.sql  |  47 ++
 .../workloads/tpcds/queries/raw/tpcds-query7.sql   |  21 +
 .../workloads/tpcds/queries/raw/tpcds-query71.sql  |  40 ++
 .../workloads/tpcds/queries/raw/tpcds-query72.sql  |  29 +
 .../workloads/tpcds/queries/raw/tpcds-query73.sql  |  28 +
 .../workloads/tpcds/queries/raw/tpcds-query74.sql  |  61 +++
 .../workloads/tpcds/queries/raw/tpcds-query75.sql  |  70 +++
 .../workloads/tpcds/queries/raw/tpcds-query76.sql  |  24 +
 .../workloads/tpcds/queries/raw/tpcds-query78.sql  |  58 ++
 .../workloads/tpcds/queries/raw/tpcds-query79.sql  |  23 +
 .../workloads/tpcds/queries/raw/tpcds-query81.sql  |  31 ++
 .../workloads/tpcds/queries/raw/tpcds-query82.sql  |  17 +
 .../workloads/tpcds/queries/raw/tpcds-query83.sql  |  67 +++
 .../workloads/tpcds/queries/raw/tpcds-query84.sql  |  21 +
 .../workloads/tpcds/queries/raw/tpcds-query88.sql  |  94 ++++
 .../workloads/tpcds/queries/raw/tpcds-query89.sql  |  28 +
 .../workloads/tpcds/queries/raw/tpcds-query90.sql  |  22 +
 .../workloads/tpcds/queries/raw/tpcds-query91.sql  |  31 ++
 .../workloads/tpcds/queries/raw/tpcds-query92.sql  |  30 ++
 .../workloads/tpcds/queries/raw/tpcds-query94.sql  |  29 +
 .../workloads/tpcds/queries/raw/tpcds-query95.sql  |  32 ++
 .../workloads/tpcds/queries/raw/tpcds-query96.sql  |  16 +
 .../workloads/tpcds/queries/raw/tpcds-query97.sql  |  25 +
 .../workloads/tpcds/queries/raw/tpcds-query98.sql  |  33 ++
 .../workloads/tpcds/queries/raw/tpcds-query99.sql  |  35 ++
 tests/common/impala_test_suite.py                  |  17 +-
 tests/comparison/cluster.py                        |   6 +-
 tests/custom_cluster/test_admission_controller.py  |  79 ++-
 tests/custom_cluster/test_local_catalog.py         | 128 ++---
 tests/custom_cluster/test_restart_services.py      |  34 +-
 tests/infra/test_stress_infra.py                   |  10 +-
 tests/webserver/test_web_pages.py                  |  18 +
 www/admission_controller.tmpl                      |  16 +-
 www/backends.tmpl                                  |   2 +
 www/root.tmpl                                      |   3 +-
 159 files changed, 6270 insertions(+), 930 deletions(-)
 create mode 100644 fe/src/main/java/org/apache/impala/analysis/CopyTestCaseStmt.java
 create mode 100644 fe/src/test/java/org/apache/impala/analysis/ExprCardinalityTest.java
 create mode 100644 fe/src/test/java/org/apache/impala/common/AbstractFrontendTest.java
 create mode 100644 fe/src/test/java/org/apache/impala/common/AnalysisSessionFixture.java
 copy fe/src/test/java/org/apache/impala/common/{FrontendTestBase.java => FrontendFixture.java} (59%)
 rename fe/src/test/java/org/apache/impala/{analysis/AnalysisSessionFixture.java => common/QueryFixture.java} (59%)
 create mode 100644 fe/src/test/java/org/apache/impala/planner/TestCaseLoaderTest.java
 create mode 100644 fe/src/test/java/org/apache/impala/testutil/EmbeddedMetastoreClientPool.java
 create mode 100644 fe/src/test/java/org/apache/impala/testutil/PlannerTestCaseLoader.java
 create mode 100644 testdata/NullRows/data.csv
 create mode 100755 testdata/bin/create-tpcds-testcase-files.sh
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query1.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query11.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query12.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query13.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query15.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query16.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query17.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query19.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query2.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query20.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query21.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query25.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query26.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query28.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query29.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query3.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query30.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query31.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query32.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query33.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query34.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query37.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query39.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query4.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query40.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query42.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query43.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query46.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query47.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query48.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query49.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query50.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query51.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query52.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query53.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query55.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query56.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query57.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query58.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query59.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query6.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query60.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query61.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query62.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query63.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query64.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query65.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query66.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query68.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query69.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query7.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query71.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query72.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query73.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query74.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query75.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query76.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query78.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query79.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query81.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query82.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query83.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query84.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query88.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query89.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query90.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query91.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query92.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query94.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query95.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query96.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query97.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query98.sql
 create mode 100644 testdata/workloads/tpcds/queries/raw/tpcds-query99.sql

[impala] 08/09: IMPALA-8163: Add banner showing local catalog mode on web UI when enabled.

Posted by ta...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 81305f205849f20fe3d31a142b10aee16d5c46de
Author: Anurag Mantripragada <an...@gmail.com>
AuthorDate: Thu Feb 7 14:18:26 2019 -0800

    IMPALA-8163: Add banner showing local catalog mode on web UI
    when enabled.
    
    Added a banner on coordinator web UI for visibility of local
    catalog mode.
    
    Testing:
    1. Added a test in test_local_catalog to scrape webpages and
    search for strings. Tested banner appears when local catalog
    mode is enabled.
    2. Refactored parts of test_local_catalog into
    TestObservability class.
    
    Change-Id: Iaeb2c7b20742a2630d9509da9b629d5f45c55207
    Reviewed-on: http://gerrit.cloudera.org:8080/12413
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/util/default-path-handlers.cc       |   3 +
 tests/custom_cluster/test_local_catalog.py | 128 +++++++++++++++--------------
 www/root.tmpl                              |   3 +-
 3 files changed, 72 insertions(+), 62 deletions(-)

diff --git a/be/src/util/default-path-handlers.cc b/be/src/util/default-path-handlers.cc
index 4458b83..6beb148 100644
--- a/be/src/util/default-path-handlers.cc
+++ b/be/src/util/default-path-handlers.cc
@@ -48,6 +48,7 @@ using namespace rapidjson;
 using namespace strings;
 
 DECLARE_bool(enable_process_lifetime_heap_profiling);
+DECLARE_bool(use_local_catalog);
 DEFINE_int64(web_log_bytes, 1024 * 1024,
     "The maximum number of bytes to display on the debug webserver's log page");
 
@@ -285,6 +286,8 @@ void RootHandler(const Webserver::ArgumentMap& args, Document* document) {
   document->AddMember("impala_server_mode", true, document->GetAllocator());
   document->AddMember("is_coordinator", impala_server->IsCoordinator(),
       document->GetAllocator());
+  document->AddMember("use_local_catalog", FLAGS_use_local_catalog,
+      document->GetAllocator());
   document->AddMember("is_executor", impala_server->IsExecutor(),
       document->GetAllocator());
   bool is_quiescing = impala_server->IsShuttingDown();
diff --git a/tests/custom_cluster/test_local_catalog.py b/tests/custom_cluster/test_local_catalog.py
index f9399d7..6173bde 100644
--- a/tests/custom_cluster/test_local_catalog.py
+++ b/tests/custom_cluster/test_local_catalog.py
@@ -29,21 +29,6 @@ RETRY_PROFILE_MSG = 'Retried query planning due to inconsistent metadata'
 
 class TestCompactCatalogUpdates(CustomClusterTestSuite):
 
-  def get_catalog_cache_metrics(self, impalad):
-    """ Returns catalog cache metrics as a dict by scraping the json metrics page on the
-    given impalad"""
-    child_groups =\
-        impalad.service.get_debug_webpage_json('metrics')['metric_group']['child_groups']
-    for group in child_groups:
-      if group['name'] != 'impala-server': continue
-      # Filter catalog cache metrics.
-      for child_group in group['child_groups']:
-        if child_group['name'] != 'catalog': continue
-        metrics_data = [(metric['name'], metric['value'])
-            for metric in child_group['metrics'] if 'catalog.cache' in metric['name']]
-        return dict(metrics_data)
-    assert False, "Catalog cache metrics not found in %s" % child_groups
-
   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(
       impalad_args="--use_local_catalog=true",
@@ -167,52 +152,6 @@ class TestCompactCatalogUpdates(CustomClusterTestSuite):
     finally:
       client.close()
 
-  @pytest.mark.execute_serially
-  @CustomClusterTestSuite.with_args(
-      impalad_args="--use_local_catalog=true",
-      catalogd_args="--catalog_topic_mode=minimal")
-  def test_cache_metrics(self, unique_database):
-    """
-    Test that profile output includes impalad local cache metrics. Also verifies that
-    the daemon level metrics are updated between query runs.
-    """
-    try:
-      impalad = self.cluster.impalads[0]
-      client = impalad.service.create_beeswax_client()
-      cache_hit_rate_metric_key = "catalog.cache.hit-rate"
-      cache_miss_rate_metric_key = "catalog.cache.miss-rate"
-      cache_hit_count_metric_key = "catalog.cache.hit-count"
-      cache_request_count_metric_key = "catalog.cache.request-count"
-      cache_request_count_prev_run = 0
-      cache_hit_count_prev_run = 0
-      test_table_name = "%s.test_cache_metrics_test_tbl" % unique_database
-      # A mix of queries of various types.
-      queries_to_test = ["select count(*) from functional.alltypes",
-          "explain select count(*) from functional.alltypes",
-          "create table %s (a int)" % test_table_name,
-          "drop table %s" % test_table_name]
-      for _ in xrange(0, 10):
-        for query in queries_to_test:
-          ret = self.execute_query_expect_success(client, query)
-          assert ret.runtime_profile.count("Frontend:") == 1
-          assert ret.runtime_profile.count("CatalogFetch") > 1
-          cache_metrics = self.get_catalog_cache_metrics(impalad)
-          cache_hit_rate = cache_metrics[cache_hit_rate_metric_key]
-          cache_miss_rate = cache_metrics[cache_miss_rate_metric_key]
-          cache_hit_count = cache_metrics[cache_hit_count_metric_key]
-          cache_request_count = cache_metrics[cache_request_count_metric_key]
-          assert cache_hit_rate > 0.0 and cache_hit_rate < 1.0
-          assert cache_miss_rate > 0.0 and cache_miss_rate < 1.0
-          assert cache_hit_count > cache_hit_count_prev_run,\
-              "%s not updated between two query runs, query - %s"\
-              % (cache_hit_count_metric_key, query)
-          assert cache_request_count > cache_request_count_prev_run,\
-             "%s not updated betweeen two query runs, query - %s"\
-             % (cache_request_count_metric_key, query)
-          cache_hit_count_prev_run = cache_hit_count
-          cache_request_count_prev_run = cache_request_count
-    finally:
-      client.close()
 
 class TestLocalCatalogRetries(CustomClusterTestSuite):
 
@@ -373,3 +312,70 @@ class TestLocalCatalogRetries(CustomClusterTestSuite):
     finally:
       client1.close()
       client2.close()
+
+
+class TestObservability(CustomClusterTestSuite):
+
+  def get_catalog_cache_metrics(self, impalad):
+    """ Returns catalog cache metrics as a dict by scraping the json metrics page on the
+    given impalad"""
+    child_groups =\
+        impalad.service.get_debug_webpage_json('metrics')['metric_group']['child_groups']
+    for group in child_groups:
+      if group['name'] != 'impala-server': continue
+      # Filter catalog cache metrics.
+      for child_group in group['child_groups']:
+        if child_group['name'] != 'catalog': continue
+        metrics_data = [(metric['name'], metric['value'])
+            for metric in child_group['metrics'] if 'catalog.cache' in metric['name']]
+        return dict(metrics_data)
+    assert False, "Catalog cache metrics not found in %s" % child_groups
+
+  @pytest.mark.execute_serially
+  @CustomClusterTestSuite.with_args(
+      impalad_args="--use_local_catalog=true",
+      catalogd_args="--catalog_topic_mode=minimal")
+  def test_cache_metrics(self, unique_database):
+    """
+    Test that profile output includes impalad local cache metrics. Also verifies that
+    the daemon level metrics are updated between query runs.
+    """
+    try:
+      impalad = self.cluster.impalads[0]
+      # Make sure local catalog mode is enabled and visible on web UI.
+      assert '(Local Catalog Mode)' in impalad.service.read_debug_webpage('/')
+      client = impalad.service.create_beeswax_client()
+      cache_hit_rate_metric_key = "catalog.cache.hit-rate"
+      cache_miss_rate_metric_key = "catalog.cache.miss-rate"
+      cache_hit_count_metric_key = "catalog.cache.hit-count"
+      cache_request_count_metric_key = "catalog.cache.request-count"
+      cache_request_count_prev_run = 0
+      cache_hit_count_prev_run = 0
+      test_table_name = "%s.test_cache_metrics_test_tbl" % unique_database
+      # A mix of queries of various types.
+      queries_to_test = ["select count(*) from functional.alltypes",
+          "explain select count(*) from functional.alltypes",
+          "create table %s (a int)" % test_table_name,
+          "drop table %s" % test_table_name]
+      for _ in xrange(0, 10):
+        for query in queries_to_test:
+          ret = self.execute_query_expect_success(client, query)
+          assert ret.runtime_profile.count("Frontend:") == 1
+          assert ret.runtime_profile.count("CatalogFetch") > 1
+          cache_metrics = self.get_catalog_cache_metrics(impalad)
+          cache_hit_rate = cache_metrics[cache_hit_rate_metric_key]
+          cache_miss_rate = cache_metrics[cache_miss_rate_metric_key]
+          cache_hit_count = cache_metrics[cache_hit_count_metric_key]
+          cache_request_count = cache_metrics[cache_request_count_metric_key]
+          assert cache_hit_rate > 0.0 and cache_hit_rate < 1.0
+          assert cache_miss_rate > 0.0 and cache_miss_rate < 1.0
+          assert cache_hit_count > cache_hit_count_prev_run,\
+              "%s not updated between two query runs, query - %s"\
+              % (cache_hit_count_metric_key, query)
+          assert cache_request_count > cache_request_count_prev_run,\
+             "%s not updated betweeen two query runs, query - %s"\
+             % (cache_request_count_metric_key, query)
+          cache_hit_count_prev_run = cache_hit_count
+          cache_request_count_prev_run = cache_request_count
+    finally:
+      client.close()
diff --git a/www/root.tmpl b/www/root.tmpl
index a33df9b..ef42be6 100644
--- a/www/root.tmpl
+++ b/www/root.tmpl
@@ -27,7 +27,8 @@ under the License.
   {{/is_quiescing}}
 
   {{?impala_server_mode}}
-  <h2>Impala Server Mode: {{?is_coordinator}}Coordinator{{/is_coordinator}}
+  <h2>Impala Server Mode: {{?is_coordinator}}Coordinator{{?use_local_catalog}}
+    (Local Catalog Mode){{/use_local_catalog}}{{/is_coordinator}}
     {{?is_executor}}Executor{{/is_executor}}</h2>
   {{/impala_server_mode}}

[impala] 03/09: IMPALA-7985: Port RemoteShutdown() to KRPC.

Posted by ta...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit adde66b37cea3689dac453f5d4f2aa4863a35979
Author: Andrew Sherman <as...@cloudera.com>
AuthorDate: Wed Jan 9 16:58:13 2019 -0800

    IMPALA-7985: Port RemoteShutdown() to KRPC.
    
    The :shutdown command is used to shutdown a remote server. The common
    case is that a user specifies the impalad to shutdown by specifying a
    host e.g. :shutdown('host100'). If a user has more than one impalad on a
    remote host then the form :shutdown('<host>:<port>') can be used to
    specify the port by which the impalad can be contacted. Prior to
    IMPALA-7985 this port was the backend port, e.g.
    :shutdown('host100:22000'). With IMPALA-7985 the port to use is the KRPC
    port, e.g. :shutdown('host100:27000').
    
    Shutdown is implemented by making an rpc call to the target impalad.
    This changes the implementation of this call to use KRPC.
    
    To aid the user in finding the KRPC port, the KRPC address is added to
    the /backends section of the debug web page.
    
    We attempt to detect the case where :shutdown is pointed at a thrift
    port (like the backend port) and print an informative message.
    
    Documentation of this change will be done in IMPALA-8098.
    Further improvements to DoRpcWithRetry() will be done in IMPALA-8143.
    
    For discussion of why it was chosen to implement this change in an
    incompatible way, see comments in
    https://issues.apache.org/jira/browse/IMPALA-7985.
    
    TESTING
    
    Ran all end-to-end tests.
    Enhance the test for /backends in test_web_pages.py.
    In test_restart_services.py add a call to the old backend port to the
    test. Some expected error messages were changed in line with what KRPC
    returns.
    
    Change-Id: I4fd00ee4e638f5e71e27893162fd65501ef9e74e
    Reviewed-on: http://gerrit.cloudera.org:8080/12260
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/runtime/backend-client.h               |  8 ---
 be/src/runtime/coordinator-backend-state.cc   | 21 +------
 be/src/runtime/coordinator-backend-state.h    |  5 --
 be/src/service/client-request-state.cc        | 79 ++++++++++++++++++++-------
 be/src/service/control-service.cc             | 25 +++++++--
 be/src/service/control-service.h              | 35 ++++++++++++
 be/src/service/impala-http-handler.cc         |  5 +-
 be/src/service/impala-internal-service.cc     |  8 ---
 be/src/service/impala-internal-service.h      |  2 -
 be/src/service/impala-server.cc               | 55 ++++++++++---------
 be/src/service/impala-server.h                |  6 +-
 common/protobuf/control_service.proto         | 37 +++++++++++++
 common/thrift/ImpalaInternalService.thrift    | 39 -------------
 tests/custom_cluster/test_restart_services.py | 34 +++++++-----
 tests/webserver/test_web_pages.py             | 18 ++++++
 www/backends.tmpl                             |  2 +
 16 files changed, 229 insertions(+), 150 deletions(-)

diff --git a/be/src/runtime/backend-client.h b/be/src/runtime/backend-client.h
index 04139a6..a4ff597 100644
--- a/be/src/runtime/backend-client.h
+++ b/be/src/runtime/backend-client.h
@@ -84,14 +84,6 @@ class ImpalaBackendClient : public ImpalaInternalServiceClient {
     ImpalaInternalServiceClient::recv_PublishFilter(_return);
   }
 
-  void RemoteShutdown(TRemoteShutdownResult& _return, const TRemoteShutdownParams& params,
-      bool* send_done) {
-    DCHECK(!*send_done);
-    ImpalaInternalServiceClient::send_RemoteShutdown(params);
-    *send_done = true;
-    ImpalaInternalServiceClient::recv_RemoteShutdown(_return);
-  }
-
 #pragma clang diagnostic pop
 
  private:
diff --git a/be/src/runtime/coordinator-backend-state.cc b/be/src/runtime/coordinator-backend-state.cc
index 2bc8547..605a758 100644
--- a/be/src/runtime/coordinator-backend-state.cc
+++ b/be/src/runtime/coordinator-backend-state.cc
@@ -417,23 +417,6 @@ void Coordinator::BackendState::UpdateExecStats(
   }
 }
 
-template <typename F>
-Status Coordinator::BackendState::DoRrpcWithRetry(
-    F&& rpc_call, const char* debug_action, const char* error_msg) {
-  Status rpc_status;
-  for (int i = 0; i < 3; i++) {
-    RpcController rpc_controller;
-    rpc_controller.set_timeout(MonoDelta::FromSeconds(10));
-    // Check for injected failures.
-    rpc_status = DebugAction(query_ctx().client_request.query_options, debug_action);
-    if (!rpc_status.ok()) continue;
-
-    rpc_status = FromKuduStatus(rpc_call(&rpc_controller), error_msg);
-    if (rpc_status.ok()) break;
-  }
-  return rpc_status;
-}
-
 bool Coordinator::BackendState::Cancel() {
   unique_lock<mutex> l(lock_);
 
@@ -472,8 +455,8 @@ bool Coordinator::BackendState::Cancel() {
     return proxy->CancelQueryFInstances(request, &response, rpc_controller);
   };
 
-  Status rpc_status = DoRrpcWithRetry(
-      cancel_rpc, "COORD_CANCEL_QUERY_FINSTANCES_RPC", "Cancel() RPC failed");
+  Status rpc_status = ControlService::DoRpcWithRetry(cancel_rpc, query_ctx(),
+      "COORD_CANCEL_QUERY_FINSTANCES_RPC", "Cancel() RPC failed", 3, 10);
 
   if (!rpc_status.ok()) {
     status_.MergeStatus(rpc_status);
diff --git a/be/src/runtime/coordinator-backend-state.h b/be/src/runtime/coordinator-backend-state.h
index 122da42..1cc67c1 100644
--- a/be/src/runtime/coordinator-backend-state.h
+++ b/be/src/runtime/coordinator-backend-state.h
@@ -307,11 +307,6 @@ class Coordinator::BackendState {
 
   /// Same as ComputeResourceUtilization() but caller must hold lock.
   ResourceUtilization ComputeResourceUtilizationLocked();
-
-  /// Retry the Rpc 'rpc_call' up to 3 times.
-  /// Pass 'debug_action' to DebugAction() to potentially inject errors.
-  template <typename F>
-  Status DoRrpcWithRetry(F&& rpc_call, const char* debug_action, const char* error_msg);
 };
 
 /// Per fragment execution statistics.
diff --git a/be/src/service/client-request-state.cc b/be/src/service/client-request-state.cc
index bde4741..d8ce417 100644
--- a/be/src/service/client-request-state.cc
+++ b/be/src/service/client-request-state.cc
@@ -21,12 +21,14 @@
 #include <limits>
 #include <gutil/strings/substitute.h>
 
+#include "exec/kudu-util.h"
+#include "kudu/rpc/rpc_controller.h"
 #include "runtime/backend-client.h"
 #include "runtime/coordinator.h"
+#include "runtime/exec-env.h"
 #include "runtime/mem-tracker.h"
 #include "runtime/row-batch.h"
 #include "runtime/runtime-state.h"
-#include "runtime/exec-env.h"
 #include "scheduling/admission-controller.h"
 #include "scheduling/scheduler.h"
 #include "service/frontend.h"
@@ -40,19 +42,23 @@
 
 #include "gen-cpp/CatalogService.h"
 #include "gen-cpp/CatalogService_types.h"
+#include "gen-cpp/control_service.pb.h"
+#include "gen-cpp/control_service.proxy.h"
 
 #include <thrift/Thrift.h>
 
 #include "common/names.h"
+#include "control-service.h"
 
 using boost::algorithm::iequals;
 using boost::algorithm::join;
+using kudu::rpc::RpcController;
 using namespace apache::hive::service::cli::thrift;
 using namespace apache::thrift;
 using namespace beeswax;
 using namespace strings;
 
-DECLARE_int32(be_port);
+DECLARE_int32(krpc_port);
 DECLARE_int32(catalog_service_port);
 DECLARE_string(catalog_service_host);
 DECLARE_int64(max_result_cache_size);
@@ -630,39 +636,72 @@ Status ClientRequestState::ExecDdlRequest() {
 
 Status ClientRequestState::ExecShutdownRequest() {
   const TShutdownParams& request = exec_request_.admin_request.shutdown_params;
-  int port = request.__isset.backend && request.backend.port != 0 ? request.backend.port :
-                                                                    FLAGS_be_port;
+  bool backend_port_specified = request.__isset.backend && request.backend.port != 0;
+  int port = backend_port_specified ? request.backend.port : FLAGS_krpc_port;
   // Use the local shutdown code path if the host is unspecified or if it exactly matches
   // the configured host/port. This avoids the possibility of RPC errors preventing
   // shutdown.
   if (!request.__isset.backend
-      || (request.backend.hostname == FLAGS_hostname && port == FLAGS_be_port)) {
-    TShutdownStatus shutdown_status;
+      || (request.backend.hostname == FLAGS_hostname && port == FLAGS_krpc_port)) {
+    ShutdownStatusPB shutdown_status;
     int64_t deadline_s = request.__isset.deadline_s ? request.deadline_s : -1;
     RETURN_IF_ERROR(parent_server_->StartShutdown(deadline_s, &shutdown_status));
     SetResultSet({ImpalaServer::ShutdownStatusToString(shutdown_status)});
     return Status::OK();
   }
-  TNetworkAddress addr = MakeNetworkAddress(request.backend.hostname, port);
 
-  TRemoteShutdownParams params;
-  if (request.__isset.deadline_s) params.__set_deadline_s(request.deadline_s);
-  TRemoteShutdownResult resp;
+  // KRPC relies on resolved IP address, so convert hostname.
+  IpAddr ip_address;
+  Status ip_status = HostnameToIpAddr(request.backend.hostname, &ip_address);
+  if (!ip_status.ok()) {
+    VLOG(1) << "Could not convert hostname " << request.backend.hostname
+            << " to ip address, error: " << ip_status.GetDetail();
+    return ip_status;
+  }
+  TNetworkAddress addr = MakeNetworkAddress(ip_address, port);
+
+  std::unique_ptr<ControlServiceProxy> proxy;
+  Status get_proxy_status = ControlService::GetProxy(addr, addr.hostname, &proxy);
+  if (!get_proxy_status.ok()) {
+    return Status(
+        Substitute("Could not get Proxy to ControlService at $0 with error: $1.",
+            TNetworkAddressToString(addr), get_proxy_status.msg().msg()));
+  }
+
+  RemoteShutdownParamsPB params;
+  if (request.__isset.deadline_s) params.set_deadline_s(request.deadline_s);
+  RemoteShutdownResultPB resp;
   VLOG_QUERY << "Sending Shutdown RPC to " << TNetworkAddressToString(addr);
-  ImpalaBackendConnection::RpcStatus rpc_status = ImpalaBackendConnection::DoRpcWithRetry(
-      ExecEnv::GetInstance()->impalad_client_cache(), addr,
-      &ImpalaBackendClient::RemoteShutdown, params,
-      [this]() { return DebugAction(query_options(), "CRS_SHUTDOWN_RPC"); }, &resp);
-  if (!rpc_status.status.ok()) {
+
+  auto shutdown_rpc = [&](RpcController* rpc_controller) -> kudu::Status {
+    return proxy->RemoteShutdown(params, &resp, rpc_controller);
+  };
+
+  Status rpc_status = ControlService::DoRpcWithRetry(
+      shutdown_rpc, query_ctx_, "CRS_SHUTDOWN_RPC", "RemoteShutdown() RPC failed", 3, 10);
+
+  if (!rpc_status.ok()) {
+    const string& msg = rpc_status.msg().msg();
     VLOG_QUERY << "RemoteShutdown query_id= " << PrintId(query_id())
                << " failed to send RPC to " << TNetworkAddressToString(addr) << " :"
-               << rpc_status.status.msg().msg();
-    return rpc_status.status;
+               << msg;
+    string err_string = Substitute(
+        "Rpc to $0 failed with error '$1'", TNetworkAddressToString(addr), msg);
+    // Attempt to detect if the the failure is because of not using a KRPC port.
+    if (backend_port_specified
+        && msg.find("RemoteShutdown() RPC failed: Timed out: connection negotiation to")
+            != string::npos) {
+      // Prior to IMPALA-7985 :shutdown() used the backend port.
+      err_string.append(" This may be because the port specified is wrong. You may have"
+                        " specified the backend (thrift) port which :shutdown() can no"
+                        " longer use. Please make sure the correct KRPC port is being"
+                        " used, or don't specify any port in the :shutdown() command.");
+    }
+    return Status(err_string);
   }
-
-  Status shutdown_status(resp.status);
+  Status shutdown_status(resp.status());
   RETURN_IF_ERROR(shutdown_status);
-  SetResultSet({ImpalaServer::ShutdownStatusToString(resp.shutdown_status)});
+  SetResultSet({ImpalaServer::ShutdownStatusToString(resp.shutdown_status())});
   return Status::OK();
 }
 
diff --git a/be/src/service/control-service.cc b/be/src/service/control-service.cc
index 991d8f3..d48b873 100644
--- a/be/src/service/control-service.cc
+++ b/be/src/service/control-service.cc
@@ -20,6 +20,7 @@
 #include "common/constant-strings.h"
 #include "exec/kudu-util.h"
 #include "kudu/rpc/rpc_context.h"
+#include "kudu/rpc/rpc_controller.h"
 #include "rpc/rpc-mgr.h"
 #include "rpc/rpc-mgr.inline.h"
 #include "runtime/coordinator.h"
@@ -94,7 +95,7 @@ bool ControlService::Authorize(const google::protobuf::Message* req,
 }
 
 Status ControlService::GetProfile(const ReportExecStatusRequestPB& request,
-    const ClientRequestState& request_state, kudu::rpc::RpcContext* rpc_context,
+    const ClientRequestState& request_state, RpcContext* rpc_context,
     TRuntimeProfileForest* thrift_profiles) {
   // Debug action to simulate deserialization failure.
   RETURN_IF_ERROR(DebugAction(request_state.query_options(),
@@ -110,7 +111,7 @@ Status ControlService::GetProfile(const ReportExecStatusRequestPB& request,
 }
 
 void ControlService::ReportExecStatus(const ReportExecStatusRequestPB* request,
-    ReportExecStatusResponsePB* response, kudu::rpc::RpcContext* rpc_context) {
+    ReportExecStatusResponsePB* response, RpcContext* rpc_context) {
   const TUniqueId query_id = ProtoToQueryId(request->query_id());
   shared_ptr<ClientRequestState> request_state =
       ExecEnv::GetInstance()->impala_server()->GetClientRequestState(query_id);
@@ -152,9 +153,9 @@ void ControlService::ReportExecStatus(const ReportExecStatusRequestPB* request,
   RespondAndReleaseRpc(resp_status, response, rpc_context);
 }
 
-template<typename ResponsePBType>
-void ControlService::RespondAndReleaseRpc(const Status& status, ResponsePBType* response,
-    kudu::rpc::RpcContext* rpc_context) {
+template <typename ResponsePBType>
+void ControlService::RespondAndReleaseRpc(
+    const Status& status, ResponsePBType* response, RpcContext* rpc_context) {
   status.ToProto(response->mutable_status());
   // Release the memory against the control service's memory tracker.
   mem_tracker_->Release(rpc_context->GetTransferSize());
@@ -162,10 +163,11 @@ void ControlService::RespondAndReleaseRpc(const Status& status, ResponsePBType*
 }
 
 void ControlService::CancelQueryFInstances(const CancelQueryFInstancesRequestPB* request,
-    CancelQueryFInstancesResponsePB* response, ::kudu::rpc::RpcContext* rpc_context) {
+    CancelQueryFInstancesResponsePB* response, RpcContext* rpc_context) {
   DCHECK(request->has_query_id());
   const TUniqueId& query_id = ProtoToQueryId(request->query_id());
   VLOG_QUERY << "CancelQueryFInstances(): query_id=" << PrintId(query_id);
+  // TODO(IMPALA-8143) Use DebugAction for fault injection.
   FAULT_INJECTION_RPC_DELAY(RPC_CANCELQUERYFINSTANCES);
   QueryState::ScopedRef qs(query_id);
   if (qs.get() == nullptr) {
@@ -177,4 +179,15 @@ void ControlService::CancelQueryFInstances(const CancelQueryFInstancesRequestPB*
   qs->Cancel();
   RespondAndReleaseRpc(Status::OK(), response, rpc_context);
 }
+
+void ControlService::RemoteShutdown(const RemoteShutdownParamsPB* req,
+    RemoteShutdownResultPB* response, RpcContext* rpc_context) {
+  // TODO(IMPALA-8143) Use DebugAction for fault injection.
+  FAULT_INJECTION_RPC_DELAY(RPC_REMOTESHUTDOWN);
+  Status status = ExecEnv::GetInstance()->impala_server()->StartShutdown(
+      req->has_deadline_s() ? req->deadline_s() : -1,
+      response->mutable_shutdown_status());
+
+  RespondAndReleaseRpc(status, response, rpc_context);
+}
 }
diff --git a/be/src/service/control-service.h b/be/src/service/control-service.h
index 5dc73dd..6a0267f 100644
--- a/be/src/service/control-service.h
+++ b/be/src/service/control-service.h
@@ -20,8 +20,16 @@
 
 #include "gen-cpp/control_service.service.h"
 
+#include "kudu/rpc/rpc_context.h"
+#include "kudu/rpc/rpc_controller.h"
+#include "util/debug-util.h"
+
 #include "common/status.h"
 
+using kudu::MonoDelta;
+using kudu::rpc::RpcContext;
+using kudu::rpc::RpcController;
+
 namespace kudu {
 namespace rpc {
 class RpcContext;
@@ -59,11 +67,38 @@ class ControlService : public ControlServiceIf {
   virtual void CancelQueryFInstances(const CancelQueryFInstancesRequestPB* req,
       CancelQueryFInstancesResponsePB* resp, ::kudu::rpc::RpcContext* context) override;
 
+  /// Initiate shutdown.
+  virtual void RemoteShutdown(const RemoteShutdownParamsPB* req,
+      RemoteShutdownResultPB* response, ::kudu::rpc::RpcContext* context) override;
+
   /// Gets a ControlService proxy to a server with 'address' and 'hostname'.
   /// The newly created proxy is returned in 'proxy'. Returns error status on failure.
   static Status GetProxy(const TNetworkAddress& address, const std::string& hostname,
       std::unique_ptr<ControlServiceProxy>* proxy);
 
+  /// Retry the Rpc 'rpc_call' up to 'times_to_try' times.
+  /// Each Rpc has a timeout of 'timeout_s' seconds.
+  /// There is no sleeping between retries.
+  /// Pass 'debug_action' to DebugAction() to potentially inject errors.
+  template <typename F>
+  static Status DoRpcWithRetry(F&& rpc_call, const TQueryCtx& query_ctx,
+      const char* debug_action, const char* error_msg, int times_to_try, int timeout_s) {
+    DCHECK_GT(times_to_try, 0);
+    Status rpc_status;
+    for (int i = 0; i < times_to_try; i++) {
+      RpcController rpc_controller;
+      rpc_controller.set_timeout(MonoDelta::FromSeconds(timeout_s));
+      // Check for injected failures.
+      rpc_status = DebugAction(query_ctx.client_request.query_options, debug_action);
+      if (!rpc_status.ok()) continue;
+
+      rpc_status = FromKuduStatus(rpc_call(&rpc_controller), error_msg);
+      if (rpc_status.ok()) break;
+      // TODO(IMPALA-8143) Add a sleep if RpcMgr::IsServerTooBusy().
+    }
+    return rpc_status;
+  }
+
  private:
   /// Tracks the memory usage of payload in the service queue.
   std::unique_ptr<MemTracker> mem_tracker_;
diff --git a/be/src/service/impala-http-handler.cc b/be/src/service/impala-http-handler.cc
index 424e5cf..4a55e66 100644
--- a/be/src/service/impala-http-handler.cc
+++ b/be/src/service/impala-http-handler.cc
@@ -33,9 +33,9 @@
 #include "runtime/timestamp-value.h"
 #include "runtime/timestamp-value.inline.h"
 #include "scheduling/admission-controller.h"
-#include "service/impala-server.h"
 #include "service/client-request-state.h"
 #include "service/frontend.h"
+#include "service/impala-server.h"
 #include "thrift/protocol/TDebugProtocol.h"
 #include "util/coding-util.h"
 #include "util/logging-support.h"
@@ -854,7 +854,10 @@ void ImpalaHttpHandler::BackendsHandler(const Webserver::ArgumentMap& args,
     Value backend_obj(kObjectType);
     string address = TNetworkAddressToString(backend.address);
     Value str(address.c_str(), document->GetAllocator());
+    Value krpc_address(
+        TNetworkAddressToString(backend.krpc_address).c_str(), document->GetAllocator());
     backend_obj.AddMember("address", str, document->GetAllocator());
+    backend_obj.AddMember("krpc_address", krpc_address, document->GetAllocator());
     backend_obj.AddMember("is_coordinator", backend.is_coordinator,
         document->GetAllocator());
     backend_obj.AddMember("is_executor", backend.is_executor, document->GetAllocator());
diff --git a/be/src/service/impala-internal-service.cc b/be/src/service/impala-internal-service.cc
index c4c2139..d4ed14d 100644
--- a/be/src/service/impala-internal-service.cc
+++ b/be/src/service/impala-internal-service.cc
@@ -86,11 +86,3 @@ void ImpalaInternalService::PublishFilter(TPublishFilterResult& return_val,
   if (qs.get() == nullptr) return;
   qs->PublishFilter(params);
 }
-
-void ImpalaInternalService::RemoteShutdown(TRemoteShutdownResult& return_val,
-    const TRemoteShutdownParams& params) {
-  FAULT_INJECTION_RPC_DELAY(RPC_REMOTESHUTDOWN);
-  Status status = impala_server_->StartShutdown(
-      params.__isset.deadline_s ? params.deadline_s : -1, &return_val.shutdown_status);
-  status.ToThrift(&return_val.status);
-}
diff --git a/be/src/service/impala-internal-service.h b/be/src/service/impala-internal-service.h
index 28000c7..9f0ea60 100644
--- a/be/src/service/impala-internal-service.h
+++ b/be/src/service/impala-internal-service.h
@@ -37,8 +37,6 @@ class ImpalaInternalService : public ImpalaInternalServiceIf {
       const TUpdateFilterParams& params);
   virtual void PublishFilter(TPublishFilterResult& return_val,
       const TPublishFilterParams& params);
-  virtual void RemoteShutdown(TRemoteShutdownResult& return_val,
-      const TRemoteShutdownParams& params);
 
  private:
   ImpalaServer* impala_server_;
diff --git a/be/src/service/impala-server.cc b/be/src/service/impala-server.cc
index cf16563..c2705d6 100644
--- a/be/src/service/impala-server.cc
+++ b/be/src/service/impala-server.cc
@@ -2428,38 +2428,39 @@ Status ImpalaServer::CheckNotShuttingDown() const {
       TErrorCode::SERVER_SHUTTING_DOWN, ShutdownStatusToString(GetShutdownStatus())));
 }
 
-TShutdownStatus ImpalaServer::GetShutdownStatus() const {
-  TShutdownStatus result;
+ShutdownStatusPB ImpalaServer::GetShutdownStatus() const {
+  ShutdownStatusPB result;
   int64_t shutdown_time = shutting_down_.Load();
   DCHECK_GT(shutdown_time, 0);
   int64_t shutdown_deadline = shutdown_deadline_.Load();
   DCHECK_GT(shutdown_time, 0);
   int64_t now = MonotonicMillis();
   int64_t elapsed_ms = now - shutdown_time;
-  result.grace_remaining_ms =
-      max<int64_t>(0, FLAGS_shutdown_grace_period_s * 1000 - elapsed_ms);
-  result.deadline_remaining_ms =
-      max<int64_t>(0, shutdown_deadline - now);
-  result.finstances_executing =
-      ImpaladMetrics::IMPALA_SERVER_NUM_FRAGMENTS_IN_FLIGHT->GetValue();
-  result.client_requests_registered = ImpaladMetrics::NUM_QUERIES_REGISTERED->GetValue();
-  result.backend_queries_executing =
-      ImpaladMetrics::BACKEND_NUM_QUERIES_EXECUTING->GetValue();
+  result.set_grace_remaining_ms(
+      max<int64_t>(0, FLAGS_shutdown_grace_period_s * 1000 - elapsed_ms));
+  result.set_deadline_remaining_ms(max<int64_t>(0, shutdown_deadline - now));
+  result.set_finstances_executing(
+      ImpaladMetrics::IMPALA_SERVER_NUM_FRAGMENTS_IN_FLIGHT->GetValue());
+  result.set_client_requests_registered(
+      ImpaladMetrics::NUM_QUERIES_REGISTERED->GetValue());
+  result.set_backend_queries_executing(
+      ImpaladMetrics::BACKEND_NUM_QUERIES_EXECUTING->GetValue());
   return result;
 }
 
-string ImpalaServer::ShutdownStatusToString(const TShutdownStatus& shutdown_status) {
+string ImpalaServer::ShutdownStatusToString(const ShutdownStatusPB& shutdown_status) {
   return Substitute("startup grace period left: $0, deadline left: $1, "
-      "queries registered on coordinator: $2, queries executing: $3, "
-      "fragment instances: $4",
-      PrettyPrinter::Print(shutdown_status.grace_remaining_ms, TUnit::TIME_MS),
-      PrettyPrinter::Print(shutdown_status.deadline_remaining_ms, TUnit::TIME_MS),
-      shutdown_status.client_requests_registered,
-      shutdown_status.backend_queries_executing, shutdown_status.finstances_executing);
+                    "queries registered on coordinator: $2, queries executing: $3, "
+                    "fragment instances: $4",
+      PrettyPrinter::Print(shutdown_status.grace_remaining_ms(), TUnit::TIME_MS),
+      PrettyPrinter::Print(shutdown_status.deadline_remaining_ms(), TUnit::TIME_MS),
+      shutdown_status.client_requests_registered(),
+      shutdown_status.backend_queries_executing(),
+      shutdown_status.finstances_executing());
 }
 
 Status ImpalaServer::StartShutdown(
-    int64_t relative_deadline_s, TShutdownStatus* shutdown_status) {
+    int64_t relative_deadline_s, ShutdownStatusPB* shutdown_status) {
   DCHECK_GE(relative_deadline_s, -1);
   if (relative_deadline_s == -1) relative_deadline_s = FLAGS_shutdown_deadline_s;
   int64_t now = MonotonicMillis();
@@ -2493,22 +2494,24 @@ Status ImpalaServer::StartShutdown(
   // Show the full grace/limit times to avoid showing confusing intermediate values
   // to the person running the statement.
   if (set_grace) {
-    shutdown_status->grace_remaining_ms = FLAGS_shutdown_grace_period_s * 1000L;
+    shutdown_status->set_grace_remaining_ms(FLAGS_shutdown_grace_period_s * 1000L);
+  }
+  if (set_deadline) {
+    shutdown_status->set_deadline_remaining_ms(relative_deadline_s * 1000L);
   }
-  if (set_deadline) shutdown_status->deadline_remaining_ms = relative_deadline_s * 1000L;
   return Status::OK();
 }
 
 [[noreturn]] void ImpalaServer::ShutdownThread() {
   while (true) {
     SleepForMs(1000);
-    TShutdownStatus shutdown_status = GetShutdownStatus();
+    const ShutdownStatusPB& shutdown_status = GetShutdownStatus();
     LOG(INFO) << "Shutdown status: " << ShutdownStatusToString(shutdown_status);
-    if (shutdown_status.grace_remaining_ms <= 0
-        && shutdown_status.backend_queries_executing == 0
-        && shutdown_status.client_requests_registered == 0) {
+    if (shutdown_status.grace_remaining_ms() <= 0
+        && shutdown_status.backend_queries_executing() == 0
+        && shutdown_status.client_requests_registered() == 0) {
       break;
-    } else if (shutdown_status.deadline_remaining_ms <= 0) {
+    } else if (shutdown_status.deadline_remaining_ms() <= 0) {
       break;
     }
   }
diff --git a/be/src/service/impala-server.h b/be/src/service/impala-server.h
index 69c1f00..c23714b 100644
--- a/be/src/service/impala-server.h
+++ b/be/src/service/impala-server.h
@@ -409,7 +409,7 @@ class ImpalaServer : public ImpalaServiceIf,
   /// information about the pending shutdown in 'shutdown_status'. 'relative_deadline_s'
   /// is the deadline value in seconds to use, or -1 if we should use the default
   /// deadline. See Shutdown class comment for explanation of the shutdown sequence.
-  Status StartShutdown(int64_t relative_deadline_s, TShutdownStatus* shutdown_status);
+  Status StartShutdown(int64_t relative_deadline_s, ShutdownStatusPB* shutdown_status);
 
   /// Returns true if a shut down is in progress.
   bool IsShuttingDown() const { return shutting_down_.Load() != 0; }
@@ -421,10 +421,10 @@ class ImpalaServer : public ImpalaServiceIf,
 
   /// Return information about the status of a shutdown. Only valid to call if a shutdown
   /// is in progress (i.e. IsShuttingDown() is true).
-  TShutdownStatus GetShutdownStatus() const;
+  ShutdownStatusPB GetShutdownStatus() const;
 
   /// Convert the shutdown status to a human-readable string.
-  static std::string ShutdownStatusToString(const TShutdownStatus& shutdown_status);
+  static std::string ShutdownStatusToString(const ShutdownStatusPB& shutdown_status);
 
   // Mapping between query option names and levels
   QueryOptionLevels query_option_levels_;
diff --git a/common/protobuf/control_service.proto b/common/protobuf/control_service.proto
index 8e6749d..f76e143 100644
--- a/common/protobuf/control_service.proto
+++ b/common/protobuf/control_service.proto
@@ -182,6 +182,40 @@ message CancelQueryFInstancesResponsePB {
   optional StatusPB status = 1;
 }
 
+message RemoteShutdownParamsPB {
+  // Deadline for the shutdown. After this deadline expires (starting at the time when
+  // this remote shutdown command is received), the Impala daemon exits immediately
+  // regardless of whether queries are still executing.
+  optional int64 deadline_s = 1;
+}
+
+// The current status of a shutdown operation.
+message ShutdownStatusPB {
+  // Milliseconds remaining in startup grace period. 0 if the period has expired.
+  optional int64 grace_remaining_ms = 1;
+
+  // Milliseconds remaining in shutdown deadline. 0 if the deadline has expired.
+  optional int64 deadline_remaining_ms = 2;
+
+  // Number of fragment instances still executing.
+  optional int64 finstances_executing = 3;
+
+  // Number of client requests still registered with the Impala server that is being shut
+  // down.
+  optional int64 client_requests_registered = 4;
+
+  // Number of queries still executing on backend.
+  optional int64 backend_queries_executing = 5;
+}
+
+message RemoteShutdownResultPB {
+  // Success or failure of the operation.
+  optional StatusPB status = 1;
+
+  // If status is OK, additional info about the shutdown status.
+  optional ShutdownStatusPB shutdown_status = 2;
+}
+
 service ControlService {
   // Override the default authorization method.
   option (kudu.rpc.default_authz_method) = "Authorize";
@@ -195,4 +229,7 @@ service ControlService {
   // fragment instance has completely stopped executing).
   rpc CancelQueryFInstances(CancelQueryFInstancesRequestPB)
       returns (CancelQueryFInstancesResponsePB);
+
+  // Called to initiate shutdown of this backend.
+  rpc RemoteShutdown(RemoteShutdownParamsPB) returns (RemoteShutdownResultPB);
 }
\ No newline at end of file
diff --git a/common/thrift/ImpalaInternalService.thrift b/common/thrift/ImpalaInternalService.thrift
index 3d3e997..fd2ea6f 100644
--- a/common/thrift/ImpalaInternalService.thrift
+++ b/common/thrift/ImpalaInternalService.thrift
@@ -733,42 +733,6 @@ struct TPublishFilterParams {
 struct TPublishFilterResult {
 }
 
-// RemoteShutdown
-
-struct TRemoteShutdownParams {
-  // Deadline for the shutdown. After this deadline expires (starting at the time when
-  // this remote shutdown command is received), the Impala daemon exits immediately
-  // regardless of whether queries are still executing.
-  1: optional i64 deadline_s
-}
-
-// The current status of a shutdown operation.
-struct TShutdownStatus {
-  // Milliseconds remaining in startup grace period. 0 if the period has expired.
-  1: required i64 grace_remaining_ms
-
-  // Milliseconds remaining in shutdown deadline. 0 if the deadline has expired.
-  2: required i64 deadline_remaining_ms
-
-  // Number of fragment instances still executing.
-  3: required i64 finstances_executing
-
-  // Number of client requests still registered with the Impala server that is being shut
-  // down.
-  4: required i64 client_requests_registered
-
-  // Number of queries still executing on backend.
-  5: required i64 backend_queries_executing
-}
-
-struct TRemoteShutdownResult {
-  // Success or failure of the operation.
-  1: required Status.TStatus status
-
-  // If status is OK, additional info about the shutdown status
-  2: required TShutdownStatus shutdown_status
-}
-
 service ImpalaInternalService {
   // Called by coord to start asynchronous execution of a query's fragment instances in
   // backend.
@@ -782,7 +746,4 @@ service ImpalaInternalService {
   // Called by the coordinator to deliver global runtime filters to fragments for
   // application at plan nodes.
   TPublishFilterResult PublishFilter(1:TPublishFilterParams params);
-
-  // Called to initiate shutdown of this backend.
-  TRemoteShutdownResult RemoteShutdown(1:TRemoteShutdownParams params);
 }
diff --git a/tests/custom_cluster/test_restart_services.py b/tests/custom_cluster/test_restart_services.py
index 1b6911a..e1e19ba 100644
--- a/tests/custom_cluster/test_restart_services.py
+++ b/tests/custom_cluster/test_restart_services.py
@@ -35,6 +35,7 @@ from tests.hs2.hs2_test_suite import HS2TestSuite, needs_session
 
 LOG = logging.getLogger(__name__)
 
+
 class TestRestart(CustomClusterTestSuite):
   @classmethod
   def get_workload(cls):
@@ -121,33 +122,40 @@ class TestShutdownCommand(CustomClusterTestSuite, HS2TestSuite):
     # Test that a failed shut down from a bogus host or port fails gracefully.
     ex = self.execute_query_expect_failure(self.client,
         ":shutdown('e6c00ca5cd67b567eb96c6ecfb26f05')")
-    assert "Couldn't open transport" in str(ex)
+    assert "Could not find IPv4 address for:" in str(ex)
     ex = self.execute_query_expect_failure(self.client, ":shutdown('localhost:100000')")
-    assert "Couldn't open transport" in str(ex)
-    # Test that pointing to the wrong thrift service (the HS2 port) fails gracefully.
-    ex = self.execute_query_expect_failure(self.client, ":shutdown('localhost:21050')")
-    assert ("RPC Error: Client for localhost:21050 hit an unexpected exception: " +
-            "Invalid method name: 'RemoteShutdown'") in str(ex)
+    assert "Invalid port:" in str(ex)
+    assert ("This may be because the port specified is wrong.") not in str(ex)
+
+    # Test that pointing to the wrong thrift service (the HS2 port) fails gracefully-ish.
+    thrift_ports = [21051, 22001]  # HS2 port, old backend port.
+    for port in thrift_ports:
+      ex = self.execute_query_expect_failure(self.client,
+          ":shutdown('localhost:{0}')".format(port))
+      assert ("failed with error 'RemoteShutdown() RPC failed") in str(ex)
+      assert ("This may be because the port specified is wrong.") in str(ex)
+
     # Test RPC error handling with debug action.
-    ex = self.execute_query_expect_failure(self.client, ":shutdown('localhost:22001')",
+    ex = self.execute_query_expect_failure(self.client, ":shutdown('localhost:27001')",
         query_options={'debug_action': 'CRS_SHUTDOWN_RPC:FAIL'})
-    assert 'Debug Action: CRS_SHUTDOWN_RPC:FAIL' in str(ex)
+    assert 'Rpc to 127.0.0.1:27001 failed with error \'Debug Action: ' \
+        'CRS_SHUTDOWN_RPC:FAIL' in str(ex)
 
     # Test remote shutdown.
     LOG.info("Start remote shutdown {0}".format(time.time()))
-    self.execute_query_expect_success(self.client, ":shutdown('localhost:22001')",
+    self.execute_query_expect_success(self.client, ":shutdown('localhost:27001')",
         query_options={})
 
     # Remote shutdown does not require statestore.
     self.cluster.statestored.kill()
     self.cluster.statestored.wait_for_exit()
-    self.execute_query_expect_success(self.client, ":shutdown('localhost:22002')",
+    self.execute_query_expect_success(self.client, ":shutdown('localhost:27002')",
         query_options={})
 
     # Test local shutdown, which should succeed even with injected RPC error.
     LOG.info("Start local shutdown {0}".format(time.time()))
     self.execute_query_expect_success(self.client,
-        ":shutdown('{0}:22000')".format(socket.gethostname()),
+        ":shutdown('{0}:27000')".format(socket.gethostname()),
         query_options={'debug_action': 'CRS_SHUTDOWN_RPC:FAIL'})
 
     # Make sure that the impala daemons exit after the startup grace period plus a 10
@@ -207,7 +215,7 @@ class TestShutdownCommand(CustomClusterTestSuite, HS2TestSuite):
     # and only get scan ranges that don't contain the midpoint of any row group, and
     # therefore not actually produce any rows.
     SLOW_QUERY = "select count(*) from tpch.lineitem where sleep(1) = l_orderkey"
-    SHUTDOWN_EXEC2 = ": shutdown('localhost:22001')"
+    SHUTDOWN_EXEC2 = ": shutdown('localhost:27001')"
 
     # Run this query before shutdown and make sure that it executes successfully on
     # all executors through the startup grace period without disruption.
@@ -275,7 +283,7 @@ class TestShutdownCommand(CustomClusterTestSuite, HS2TestSuite):
     # Test that we can reduce the deadline after setting it to a high value.
     # Run a query that will fail as a result of the reduced deadline.
     deadline_expiry_handle = self.__exec_and_wait_until_running(SLOW_QUERY)
-    SHUTDOWN_EXEC3 = ": shutdown('localhost:22002', {0})"
+    SHUTDOWN_EXEC3 = ": shutdown('localhost:27002', {0})"
     VERY_HIGH_DEADLINE = 5000
     HIGH_DEADLINE = 1000
     LOW_DEADLINE = 5
diff --git a/tests/webserver/test_web_pages.py b/tests/webserver/test_web_pages.py
index bbc6172..c7fa741 100644
--- a/tests/webserver/test_web_pages.py
+++ b/tests/webserver/test_web_pages.py
@@ -469,3 +469,21 @@ class TestWebPage(ImpalaTestSuite):
     assert 'backends' in response_json
     # When this test runs, all impalads would have already started.
     assert len(response_json['backends']) == 3
+
+    # Look at results for a single backend - they are not sorted.
+    backend_row = response_json['backends'][0]
+
+    # The 'address' column is the backend port of the impalad.
+    assert len(backend_row['address']) > 0
+    be_ports = ('22000', '22001', '22002')
+    assert backend_row['address'].endswith(be_ports)
+
+    # The 'krpc_address' is the krpc address of the impalad.
+    assert len(backend_row['krpc_address']) > 0
+    krpc_ports = ('27000', '27001', '27002')
+    assert backend_row['krpc_address'].endswith(krpc_ports)
+
+    assert backend_row['is_coordinator']
+    assert backend_row['is_executor']
+    assert not backend_row['is_quiescing']
+    assert len(backend_row['admit_mem_limit']) > 0
diff --git a/www/backends.tmpl b/www/backends.tmpl
index c80d25a..d29f4a1 100644
--- a/www/backends.tmpl
+++ b/www/backends.tmpl
@@ -24,6 +24,7 @@ under the License.
   <thead>
     <tr>
       <th>Address</th>
+      <th>Krpc Address</th>
       <th>Coordinator</th>
       <th>Executor</th>
       <th>Quiescing</th>
@@ -36,6 +37,7 @@ under the License.
     {{#backends}}
     <tr>
       <td>{{address}}</td>
+      <td>{{krpc_address}}</td>
       <td>{{is_coordinator}}</td>
       <td>{{is_executor}}</td>
       <td>{{is_quiescing}}</td>

[impala] 05/09: IMPALA-5872: Testcase builder for query planner

Posted by ta...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit f7df8adfaee1e824d9e32271e0e564cd33a37d70
Author: Bharath Vissapragada <bh...@cloudera.com>
AuthorDate: Wed Jan 9 22:50:56 2019 -0800

    IMPALA-5872: Testcase builder for query planner
    
    Implements a new testcase builder for simulating query plans
    from one cluster on a different cluster/minicluster with
    different number of nodes. The testcase is collected from one
    cluster and can be replayed on any other cluster. It includes
    all the information that is needed to replay the query plan
    exactly as in the source cluster.
    
    Also adds a stand-alone tool (PlannerTestCaseLoader) that can
    replay the testcase without having to start an actual cluster
    or a dev minicluster. This is done to make testcase debugging
    simpler.
    
    Motivation:
    ----------
    - Make query planner issues easily reproducible
    - Improve user experience while collecting query diagnostics
    - Make it easy to test new planner features by testing it on customer
      usecases collected from much larger clusters.
    
    Commands:
    --------
    -- Collect testcase for a query stmt (outputs the testcase file path).
    impala-shell> COPY TESTCASE TO <hdfs dirpath> <query stmt>
    
    -- Load the testcase metadata in a target cluster (dumps the query stmt)
    impala-shell> COPY TESTCASE FROM <hdfs testcase file path>
    -- Replay the query plan
    impala-shell> SET PLANNER_DEBUG_MODE=true
    impala-shell> EXPLAIN <query stmt>
    
    How it works?
    ------------
    - During export on the source cluster, the command dumps all the thrift
      states of referenced objects in the query into a gzipped binary file.
    - During replay on a target cluster, it adds these objects to the catalog
      cache by faking them as DDLs.
    - The planner also fakes the number of hosts by using the scan range
      information from the target cluster.
    
    Caveats:
    ------
    - Tested to work with HDFS tables. Tables based on other filesystems like
      HBase/Kudu may not work as desired.
    - The tool does not collect actual data files for the tables. Only the
      metadata state is dumped.
    - Currently only imports databases/tables/views. We can extend it to
      work for UDFS etc.
    - It only works for QueryStmts (select/union queries)
    - On a sentry enabled cluster, the role running the query requires
      VIEW_METADATA privilege on every db/table/view referenced in the query
      statement.
    - Once the metadata dump is loaded on a target cluster, the state is
      volatile. Hence it cannot survive a cluster restart / invalidate
      metadata
    - Loading a testcase requires setting the query option (SET
      PLANNER_DEBUG_MODE=true) so that the planner knows to fake the number
      of hosts. Otherwise it takes into account the local cluster topology.
    - Cross version compatibility of testcases needs some thought. For
      example, creating a testcase from Impala version 3.2 and trying to
      replay it on Impala version 3.5. This could be problematic if we don't
      keep the underlying thrift structures backward compatible.
    
    Change-Id: Iec83eeb2dc5136768b70ed581fb8d3ed0335cb52
    Reviewed-on: http://gerrit.cloudera.org:8080/12221
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/service/client-request-state.cc             |   5 +
 be/src/service/query-options.cc                    |   4 +
 be/src/service/query-options.h                     |   4 +-
 be/src/util/backend-gflag-util.cc                  |   2 +
 bin/rat_exclude_files.txt                          |   1 +
 common/thrift/BackendGflags.thrift                 |   2 +
 common/thrift/CatalogService.thrift                |   3 +
 common/thrift/Frontend.thrift                      |  27 +++
 common/thrift/ImpalaInternalService.thrift         |   4 +-
 common/thrift/ImpalaService.thrift                 |   5 +
 common/thrift/JniCatalog.thrift                    |   8 +-
 common/thrift/Types.thrift                         |   1 +
 fe/src/main/cup/sql-parser.cup                     |  72 ++++---
 .../apache/impala/analysis/AnalysisContext.java    |   9 +-
 .../apache/impala/analysis/CopyTestCaseStmt.java   | 210 ++++++++++++++++++++
 .../java/org/apache/impala/analysis/HdfsUri.java   |  25 ++-
 .../java/org/apache/impala/analysis/QueryStmt.java |  13 ++
 .../org/apache/impala/analysis/SelectStmt.java     |  29 ++-
 .../org/apache/impala/analysis/StmtRewriter.java   |   2 +
 .../java/org/apache/impala/catalog/Catalog.java    |  28 ++-
 .../impala/catalog/CatalogServiceCatalog.java      |  48 ++++-
 .../main/java/org/apache/impala/catalog/FeDb.java  |   9 +
 .../java/org/apache/impala/catalog/FeTable.java    |   9 +
 .../apache/impala/catalog/MetaStoreClientPool.java |   6 +
 .../org/apache/impala/common/FileSystemUtil.java   |  16 ++
 .../java/org/apache/impala/common/JniUtil.java     |  19 ++
 .../org/apache/impala/planner/HdfsScanNode.java    |  92 +++++----
 .../java/org/apache/impala/planner/Planner.java    |   6 +
 .../org/apache/impala/service/BackendConfig.java   |   1 +
 .../apache/impala/service/CatalogOpExecutor.java   | 103 ++++++++++
 .../java/org/apache/impala/service/Frontend.java   |  31 ++-
 fe/src/main/jflex/sql-scanner.flex                 |   1 +
 .../org/apache/impala/analysis/AnalyzerTest.java   |  19 ++
 .../impala/analysis/AuthorizationStmtTest.java     |  93 +++++++++
 .../org/apache/impala/analysis/ParserTest.java     |  28 ++-
 .../apache/impala/planner/TestCaseLoaderTest.java  |  75 +++++++
 .../impala/testutil/CatalogServiceTestCatalog.java |  33 +++-
 .../testutil/EmbeddedMetastoreClientPool.java      |  76 +++++++
 .../apache/impala/testutil/ImpaladTestCatalog.java |  46 ++++-
 .../impala/testutil/PlannerTestCaseLoader.java     |  97 +++++++++
 testdata/bin/create-load-data.sh                   |   3 +
 testdata/bin/create-tpcds-testcase-files.sh        |  47 +++++
 .../workloads/tpcds/queries/raw/tpcds-query1.sql   |  25 +++
 .../workloads/tpcds/queries/raw/tpcds-query11.sql  |  81 ++++++++
 .../workloads/tpcds/queries/raw/tpcds-query12.sql  |  34 ++++
 .../workloads/tpcds/queries/raw/tpcds-query13.sql  |  52 +++++
 .../workloads/tpcds/queries/raw/tpcds-query15.sql  |  20 ++
 .../workloads/tpcds/queries/raw/tpcds-query16.sql  |  31 +++
 .../workloads/tpcds/queries/raw/tpcds-query17.sql  |  45 +++++
 .../workloads/tpcds/queries/raw/tpcds-query19.sql  |  25 +++
 .../workloads/tpcds/queries/raw/tpcds-query2.sql   |  60 ++++++
 .../workloads/tpcds/queries/raw/tpcds-query20.sql  |  30 +++
 .../workloads/tpcds/queries/raw/tpcds-query21.sql  |  30 +++
 .../workloads/tpcds/queries/raw/tpcds-query25.sql  |  48 +++++
 .../workloads/tpcds/queries/raw/tpcds-query26.sql  |  21 ++
 .../workloads/tpcds/queries/raw/tpcds-query28.sql  |  53 +++++
 .../workloads/tpcds/queries/raw/tpcds-query29.sql  |  47 +++++
 .../workloads/tpcds/queries/raw/tpcds-query3.sql   |  21 ++
 .../workloads/tpcds/queries/raw/tpcds-query30.sql  |  31 +++
 .../workloads/tpcds/queries/raw/tpcds-query31.sql  |  52 +++++
 .../workloads/tpcds/queries/raw/tpcds-query32.sql  |  28 +++
 .../workloads/tpcds/queries/raw/tpcds-query33.sql  |  75 +++++++
 .../workloads/tpcds/queries/raw/tpcds-query34.sql  |  31 +++
 .../workloads/tpcds/queries/raw/tpcds-query37.sql  |  17 ++
 .../workloads/tpcds/queries/raw/tpcds-query39.sql  |  54 +++++
 .../workloads/tpcds/queries/raw/tpcds-query4.sql   | 116 +++++++++++
 .../workloads/tpcds/queries/raw/tpcds-query40.sql  |  28 +++
 .../workloads/tpcds/queries/raw/tpcds-query42.sql  |  22 +++
 .../workloads/tpcds/queries/raw/tpcds-query43.sql  |  19 ++
 .../workloads/tpcds/queries/raw/tpcds-query46.sql  |  35 ++++
 .../workloads/tpcds/queries/raw/tpcds-query47.sql  |  51 +++++
 .../workloads/tpcds/queries/raw/tpcds-query48.sql  |  67 +++++++
 .../workloads/tpcds/queries/raw/tpcds-query49.sql  | 127 ++++++++++++
 .../workloads/tpcds/queries/raw/tpcds-query50.sql  |  59 ++++++
 .../workloads/tpcds/queries/raw/tpcds-query51.sql  |  45 +++++
 .../workloads/tpcds/queries/raw/tpcds-query52.sql  |  22 +++
 .../workloads/tpcds/queries/raw/tpcds-query53.sql  |  28 +++
 .../workloads/tpcds/queries/raw/tpcds-query55.sql  |  14 ++
 .../workloads/tpcds/queries/raw/tpcds-query56.sql  |  69 +++++++
 .../workloads/tpcds/queries/raw/tpcds-query57.sql  |  48 +++++
 .../workloads/tpcds/queries/raw/tpcds-query58.sql  |  65 ++++++
 .../workloads/tpcds/queries/raw/tpcds-query59.sql  |  44 +++++
 .../workloads/tpcds/queries/raw/tpcds-query6.sql   |  26 +++
 .../workloads/tpcds/queries/raw/tpcds-query60.sql  |  78 ++++++++
 .../workloads/tpcds/queries/raw/tpcds-query61.sql  |  44 +++++
 .../workloads/tpcds/queries/raw/tpcds-query62.sql  |  35 ++++
 .../workloads/tpcds/queries/raw/tpcds-query63.sql  |  29 +++
 .../workloads/tpcds/queries/raw/tpcds-query64.sql  | 120 +++++++++++
 .../workloads/tpcds/queries/raw/tpcds-query65.sql  |  29 +++
 .../workloads/tpcds/queries/raw/tpcds-query66.sql  | 220 +++++++++++++++++++++
 .../workloads/tpcds/queries/raw/tpcds-query68.sql  |  42 ++++
 .../workloads/tpcds/queries/raw/tpcds-query69.sql  |  47 +++++
 .../workloads/tpcds/queries/raw/tpcds-query7.sql   |  21 ++
 .../workloads/tpcds/queries/raw/tpcds-query71.sql  |  40 ++++
 .../workloads/tpcds/queries/raw/tpcds-query72.sql  |  29 +++
 .../workloads/tpcds/queries/raw/tpcds-query73.sql  |  28 +++
 .../workloads/tpcds/queries/raw/tpcds-query74.sql  |  61 ++++++
 .../workloads/tpcds/queries/raw/tpcds-query75.sql  |  70 +++++++
 .../workloads/tpcds/queries/raw/tpcds-query76.sql  |  24 +++
 .../workloads/tpcds/queries/raw/tpcds-query78.sql  |  58 ++++++
 .../workloads/tpcds/queries/raw/tpcds-query79.sql  |  23 +++
 .../workloads/tpcds/queries/raw/tpcds-query81.sql  |  31 +++
 .../workloads/tpcds/queries/raw/tpcds-query82.sql  |  17 ++
 .../workloads/tpcds/queries/raw/tpcds-query83.sql  |  67 +++++++
 .../workloads/tpcds/queries/raw/tpcds-query84.sql  |  21 ++
 .../workloads/tpcds/queries/raw/tpcds-query88.sql  |  94 +++++++++
 .../workloads/tpcds/queries/raw/tpcds-query89.sql  |  28 +++
 .../workloads/tpcds/queries/raw/tpcds-query90.sql  |  22 +++
 .../workloads/tpcds/queries/raw/tpcds-query91.sql  |  31 +++
 .../workloads/tpcds/queries/raw/tpcds-query92.sql  |  30 +++
 .../workloads/tpcds/queries/raw/tpcds-query94.sql  |  29 +++
 .../workloads/tpcds/queries/raw/tpcds-query95.sql  |  32 +++
 .../workloads/tpcds/queries/raw/tpcds-query96.sql  |  16 ++
 .../workloads/tpcds/queries/raw/tpcds-query97.sql  |  25 +++
 .../workloads/tpcds/queries/raw/tpcds-query98.sql  |  33 ++++
 .../workloads/tpcds/queries/raw/tpcds-query99.sql  |  35 ++++
 116 files changed, 4487 insertions(+), 130 deletions(-)

diff --git a/be/src/service/client-request-state.cc b/be/src/service/client-request-state.cc
index d8ce417..bac6c1c 100644
--- a/be/src/service/client-request-state.cc
+++ b/be/src/service/client-request-state.cc
@@ -176,6 +176,11 @@ Status ClientRequestState::Exec(TExecRequest* exec_request) {
           exec_request_.explain_result.results));
       break;
     }
+    case TStmtType::TESTCASE: {
+      DCHECK(exec_request_.__isset.testcase_data_path);
+      SetResultSet(vector<string>(1, exec_request_.testcase_data_path));
+      break;
+    }
     case TStmtType::DDL: {
       DCHECK(exec_request_.__isset.catalog_op_request);
       RETURN_IF_ERROR(ExecDdlRequest());
diff --git a/be/src/service/query-options.cc b/be/src/service/query-options.cc
index 74c178b..5720eac 100644
--- a/be/src/service/query-options.cc
+++ b/be/src/service/query-options.cc
@@ -734,6 +734,10 @@ Status impala::SetQueryOption(const string& key, const string& value,
               value));
         }
         query_options->__set_resource_trace_ratio(val);
+      }
+      case TImpalaQueryOptions::PLANNER_TESTCASE_MODE: {
+        query_options->__set_planner_testcase_mode(
+            iequals(value, "true") || iequals(value, "1"));
         break;
       }
       case TImpalaQueryOptions::NUM_REMOTE_EXECUTOR_CANDIDATES: {
diff --git a/be/src/service/query-options.h b/be/src/service/query-options.h
index 7ad1ca0..7fee4d1 100644
--- a/be/src/service/query-options.h
+++ b/be/src/service/query-options.h
@@ -41,7 +41,7 @@ typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type>
 // the DCHECK.
 #define QUERY_OPTS_TABLE\
   DCHECK_EQ(_TImpalaQueryOptions_VALUES_TO_NAMES.size(),\
-      TImpalaQueryOptions::NUM_ROWS_PRODUCED_LIMIT + 1);\
+      TImpalaQueryOptions::PLANNER_TESTCASE_MODE + 1);\
   REMOVED_QUERY_OPT_FN(abort_on_default_limit_exceeded, ABORT_ON_DEFAULT_LIMIT_EXCEEDED)\
   QUERY_OPT_FN(abort_on_error, ABORT_ON_ERROR, TQueryOptionLevel::REGULAR)\
   REMOVED_QUERY_OPT_FN(allow_unsupported_formats, ALLOW_UNSUPPORTED_FORMATS)\
@@ -150,6 +150,8 @@ typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type>
       TQueryOptionLevel::ADVANCED)\
   QUERY_OPT_FN(num_rows_produced_limit, NUM_ROWS_PRODUCED_LIMIT,\
       TQueryOptionLevel::ADVANCED)\
+  QUERY_OPT_FN(\
+      planner_testcase_mode, PLANNER_TESTCASE_MODE, TQueryOptionLevel::DEVELOPMENT)
   ;
 
 /// Enforce practical limits on some query options to avoid undesired query state.
diff --git a/be/src/util/backend-gflag-util.cc b/be/src/util/backend-gflag-util.cc
index b02c70a..0490288 100644
--- a/be/src/util/backend-gflag-util.cc
+++ b/be/src/util/backend-gflag-util.cc
@@ -17,6 +17,7 @@
 
 #include "common/global-flags.h"
 
+#include "common/version.h"
 #include "gen-cpp/BackendGflags_types.h"
 #include "rpc/jni-thrift-util.h"
 #include "util/backend-gflag-util.h"
@@ -141,6 +142,7 @@ Status GetThriftBackendGflags(JNIEnv* jni_env, jbyteArray* cfg_bytes) {
   cfg.__set_kudu_mutation_buffer_size(FLAGS_kudu_mutation_buffer_size);
   cfg.__set_kudu_error_buffer_size(FLAGS_kudu_error_buffer_size);
   cfg.__set_hms_event_polling_interval_s(FLAGS_hms_event_polling_interval_s);
+  cfg.__set_impala_build_version(::GetDaemonBuildVersion());
   RETURN_IF_ERROR(SerializeThriftMsg(jni_env, &cfg, cfg_bytes));
   return Status::OK();
 }
diff --git a/bin/rat_exclude_files.txt b/bin/rat_exclude_files.txt
index 5809da2..df20bbf 100644
--- a/bin/rat_exclude_files.txt
+++ b/bin/rat_exclude_files.txt
@@ -95,6 +95,7 @@ lib/python/impala_py_lib/gdb/README.md
 # http://www.apache.org/legal/src-headers.html: "Test data for which the addition of a
 # source header would cause the tests to fail."
 testdata/*.csv
+testdata/*.sql
 testdata/*.test
 be/src/testutil/*.pem
 *.json
diff --git a/common/thrift/BackendGflags.thrift b/common/thrift/BackendGflags.thrift
index fe724c2..ab714a5 100644
--- a/common/thrift/BackendGflags.thrift
+++ b/common/thrift/BackendGflags.thrift
@@ -115,4 +115,6 @@ struct TBackendGflags {
   44: required i32 kudu_error_buffer_size
 
   45: required i32 hms_event_polling_interval_s
+
+  46: required string impala_build_version
 }
diff --git a/common/thrift/CatalogService.thrift b/common/thrift/CatalogService.thrift
index 9add6a6..0f186a2 100644
--- a/common/thrift/CatalogService.thrift
+++ b/common/thrift/CatalogService.thrift
@@ -145,6 +145,9 @@ struct TDdlExecRequest {
 
   // Parameters for ALTER DATABASE
   24: optional JniCatalog.TAlterDbParams alter_db_params
+
+  // Parameters for replaying an exported testcase.
+  25: optional JniCatalog.TCopyTestCaseReq copy_test_case_params
 }
 
 // Response from executing a TDdlExecRequest
diff --git a/common/thrift/Frontend.thrift b/common/thrift/Frontend.thrift
index ffbfc07..5670bb6 100644
--- a/common/thrift/Frontend.thrift
+++ b/common/thrift/Frontend.thrift
@@ -647,6 +647,9 @@ struct TExecRequest {
 
   // Profile information from the planning process.
   14: optional RuntimeProfile.TRuntimeProfileNode profile
+
+  // Set iff stmt_type is TESTCASE
+  15: optional string testcase_data_path
 }
 
 // Parameters to FeSupport.cacheJar().
@@ -905,3 +908,27 @@ struct TBuildTestDescriptorTableParams {
   // Every entry describes the slot types of one tuple.
   1: required list<list<Types.TColumnType>> slot_types
 }
+
+// Output format for generating a testcase for a given query_stmt. The resulting bytes
+// are compressed before writing to a file.
+// TODO: Add the EXPLAIN string from the source cluster on which the testcase was
+// collected.
+struct TTestCaseData {
+  // Query statemnt for which this test case data is generated.
+  1: required string query_stmt
+
+  // All referenced table and view defs.
+  2: optional list<CatalogObjects.TTable> tables_and_views
+
+  // All databases referenced in the query.
+  3: optional list<CatalogObjects.TDatabase> dbs
+
+  // Output path
+  4: required string testcase_data_path
+
+  // Impala version that was used to generate this testcase.
+  // TODO: How to deal with version incompatibilities? E.g: A testcase collected on
+  // Impala version v1 may or may not be compatible to Impala version v2 if the
+  // underlying thrift layout changes.
+  5: required string impala_version
+}
diff --git a/common/thrift/ImpalaInternalService.thrift b/common/thrift/ImpalaInternalService.thrift
index fd2ea6f..7ae5bef 100644
--- a/common/thrift/ImpalaInternalService.thrift
+++ b/common/thrift/ImpalaInternalService.thrift
@@ -312,7 +312,6 @@ struct TQueryOptions {
   // See comment in ImpalaService.thrift
   74: optional string client_identifier;
 
-  // See comment in ImpalaService.thrift
   75: optional double resource_trace_ratio = 0;
 
   // See comment in ImpalaService.thrift.
@@ -321,6 +320,9 @@ struct TQueryOptions {
 
   // See comment in ImpalaService.thrift.
   77: optional i64 num_rows_produced_limit = 0;
+
+  // See comment in ImpalaService.thrift
+  78: optional bool planner_testcase_mode = false;
 }
 
 // Impala currently has two types of sessions: Beeswax and HiveServer2
diff --git a/common/thrift/ImpalaService.thrift b/common/thrift/ImpalaService.thrift
index 8d5f872..0d89a12 100644
--- a/common/thrift/ImpalaService.thrift
+++ b/common/thrift/ImpalaService.thrift
@@ -372,6 +372,11 @@ enum TImpalaQueryOptions {
   // canceled if the query is still executing after this limit is hit. A value
   // of 0 means there is no limit on the number of rows produced.
   NUM_ROWS_PRODUCED_LIMIT
+
+  // Set when attempting to load a planner testcase. Typically used by developers for
+  // debugging a testcase. Should not be set in user clusters. If set, a warning
+  // is emitted in the query runtime profile.
+  PLANNER_TESTCASE_MODE
 }
 
 // The summary of a DML statement.
diff --git a/common/thrift/JniCatalog.thrift b/common/thrift/JniCatalog.thrift
index 2d97f43..6714d48 100644
--- a/common/thrift/JniCatalog.thrift
+++ b/common/thrift/JniCatalog.thrift
@@ -53,7 +53,8 @@ enum TDdlType {
   REVOKE_PRIVILEGE,
   TRUNCATE_TABLE,
   COMMENT_ON,
-  ALTER_DATABASE
+  ALTER_DATABASE,
+  COPY_TESTCASE
 }
 
 enum TOwnerType {
@@ -748,3 +749,8 @@ struct TGetCatalogServerMetricsResponse {
   // Partial fetch RPC queue length.
   1: required i32 catalog_partial_fetch_rpc_queue_len
 }
+
+// Request to copy the generated testcase from a given input path.
+struct TCopyTestCaseReq {
+  1: required string input_path
+}
diff --git a/common/thrift/Types.thrift b/common/thrift/Types.thrift
index 0d304ab..d718452 100644
--- a/common/thrift/Types.thrift
+++ b/common/thrift/Types.thrift
@@ -100,6 +100,7 @@ enum TStmtType {
   DDL, // Data definition, e.g. CREATE TABLE (includes read-only functions e.g. SHOW)
   DML, // Data modification e.g. INSERT
   EXPLAIN,
+  TESTCASE, // For generating a testcase for QueryStmts.
   LOAD, // Statement type for LOAD commands
   SET,
   ADMIN_FN // Admin function, e.g. ": shutdown()".
diff --git a/fe/src/main/cup/sql-parser.cup b/fe/src/main/cup/sql-parser.cup
index 051bb48..9d80ced 100644
--- a/fe/src/main/cup/sql-parser.cup
+++ b/fe/src/main/cup/sql-parser.cup
@@ -276,28 +276,28 @@ terminal
   KW_ARRAY, KW_AS, KW_ASC, KW_AUTHORIZATION, KW_AVRO, KW_BETWEEN, KW_BIGINT, KW_BINARY,
   KW_BLOCKSIZE, KW_BOOLEAN, KW_BY, KW_CACHED, KW_CASCADE, KW_CASE, KW_CAST, KW_CHANGE,
   KW_CHAR, KW_CLASS, KW_CLOSE_FN, KW_COLUMN, KW_COLUMNS, KW_COMMENT, KW_COMPRESSION,
-  KW_COMPUTE, KW_CREATE, KW_CROSS, KW_CURRENT, KW_DATA, KW_DATABASE, KW_DATABASES,
-  KW_DATE, KW_DATETIME, KW_DECIMAL, KW_DEFAULT, KW_DELETE, KW_DELIMITED, KW_DESC,
-  KW_DESCRIBE, KW_DISTINCT, KW_DIV, KW_DOUBLE, KW_DROP, KW_ELSE,
-  KW_ENCODING, KW_END, KW_ESCAPED, KW_EXISTS, KW_EXPLAIN, KW_EXTENDED, KW_EXTERNAL,
-  KW_FALSE, KW_FIELDS, KW_FILEFORMAT, KW_FILES, KW_FINALIZE_FN, KW_FIRST, KW_FLOAT,
-  KW_FOLLOWING, KW_FOR, KW_FORMAT, KW_FORMATTED, KW_FROM, KW_FULL, KW_FUNCTION,
-  KW_FUNCTIONS, KW_GRANT, KW_GROUP, KW_HASH, KW_IGNORE, KW_HAVING, KW_IF, KW_ILIKE,
-  KW_IN, KW_INCREMENTAL, KW_INIT_FN, KW_INNER, KW_INPATH, KW_INSERT, KW_INT,
-  KW_INTERMEDIATE, KW_INTERVAL, KW_INTO, KW_INVALIDATE, KW_IREGEXP, KW_IS, KW_JOIN,
-  KW_KUDU, KW_LAST, KW_LEFT, KW_LIKE, KW_LIMIT, KW_LINES, KW_LOAD, KW_LOCATION, KW_MAP,
-  KW_MERGE_FN, KW_METADATA, KW_NOT, KW_NULL, KW_NULLS, KW_OFFSET, KW_ON, KW_OR, KW_ORC,
-  KW_ORDER, KW_OUTER, KW_OVER, KW_OVERWRITE, KW_PARQUET, KW_PARQUETFILE, KW_PARTITION,
-  KW_PARTITIONED, KW_PARTITIONS, KW_PRECEDING, KW_PREPARE_FN, KW_PRIMARY, KW_PRODUCED,
-  KW_PURGE, KW_RANGE, KW_RCFILE, KW_RECOVER, KW_REFRESH, KW_REGEXP, KW_RENAME,
-  KW_REPEATABLE, KW_REPLACE, KW_REPLICATION, KW_RESTRICT, KW_RETURNS, KW_REVOKE,
-  KW_RIGHT, KW_RLIKE, KW_ROLE, KW_ROLES, KW_ROW, KW_ROWS, KW_SCHEMA, KW_SCHEMAS,
-  KW_SELECT, KW_SEMI, KW_SEQUENCEFILE, KW_SERDEPROPERTIES, KW_SERIALIZE_FN, KW_SET,
-  KW_SHOW, KW_SMALLINT, KW_SORT, KW_STORED, KW_STRAIGHT_JOIN, KW_STRING, KW_STRUCT,
-  KW_SYMBOL, KW_TABLE, KW_TABLES, KW_TABLESAMPLE, KW_TBLPROPERTIES, KW_TERMINATED,
-  KW_TEXTFILE, KW_THEN, KW_TIMESTAMP, KW_TINYINT, KW_TRUNCATE, KW_STATS, KW_TO, KW_TRUE,
-  KW_UNBOUNDED, KW_UNCACHED, KW_UNION, KW_UNKNOWN, KW_UPDATE, KW_UPDATE_FN, KW_UPSERT,
-  KW_USE, KW_USING, KW_VALUES, KW_VARCHAR, KW_VIEW, KW_WHEN, KW_WHERE, KW_WITH;
+  KW_COMPUTE, KW_COPY, KW_CREATE, KW_CROSS, KW_CURRENT, KW_DATA, KW_DATABASE,
+  KW_DATABASES, KW_DATE, KW_DATETIME, KW_DECIMAL, KW_DEFAULT, KW_DELETE, KW_DELIMITED,
+  KW_DESC, KW_DESCRIBE, KW_DISTINCT, KW_DIV, KW_DOUBLE, KW_DROP, KW_ELSE, KW_ENCODING,
+  KW_END, KW_ESCAPED, KW_EXISTS, KW_EXPLAIN, KW_EXTENDED, KW_EXTERNAL, KW_FALSE,
+  KW_FIELDS, KW_FILEFORMAT, KW_FILES, KW_FINALIZE_FN, KW_FIRST, KW_FLOAT, KW_FOLLOWING,
+  KW_FOR, KW_FORMAT, KW_FORMATTED, KW_FROM, KW_FULL, KW_FUNCTION, KW_FUNCTIONS, KW_GRANT,
+  KW_GROUP, KW_HASH, KW_IGNORE, KW_HAVING, KW_IF, KW_ILIKE, KW_IN, KW_INCREMENTAL,
+  KW_INIT_FN, KW_INNER, KW_INPATH, KW_INSERT, KW_INT, KW_INTERMEDIATE, KW_INTERVAL,
+  KW_INTO, KW_INVALIDATE, KW_IREGEXP, KW_IS, KW_JOIN, KW_KUDU, KW_LAST, KW_LEFT, KW_LIKE,
+  KW_LIMIT, KW_LINES, KW_LOAD, KW_LOCATION, KW_MAP, KW_MERGE_FN, KW_METADATA, KW_NOT,
+  KW_NULL, KW_NULLS, KW_OFFSET, KW_ON, KW_OR, KW_ORC, KW_ORDER, KW_OUTER, KW_OVER,
+  KW_OVERWRITE, KW_PARQUET, KW_PARQUETFILE, KW_PARTITION, KW_PARTITIONED, KW_PARTITIONS,
+  KW_PRECEDING, KW_PREPARE_FN, KW_PRIMARY, KW_PRODUCED, KW_PURGE, KW_RANGE, KW_RCFILE,
+  KW_RECOVER, KW_REFRESH, KW_REGEXP, KW_RENAME, KW_REPEATABLE, KW_REPLACE, KW_REPLICATION,
+  KW_RESTRICT, KW_RETURNS, KW_REVOKE, KW_RIGHT, KW_RLIKE, KW_ROLE, KW_ROLES, KW_ROW,
+  KW_ROWS, KW_SCHEMA, KW_SCHEMAS, KW_SELECT, KW_SEMI, KW_SEQUENCEFILE, KW_SERDEPROPERTIES,
+  KW_SERIALIZE_FN, KW_SET, KW_SHOW, KW_SMALLINT, KW_SORT, KW_STORED, KW_STRAIGHT_JOIN,
+  KW_STRING, KW_STRUCT, KW_SYMBOL, KW_TABLE, KW_TABLES, KW_TABLESAMPLE, KW_TBLPROPERTIES,
+  KW_TERMINATED, KW_TEXTFILE, KW_THEN, KW_TIMESTAMP, KW_TINYINT, KW_TRUNCATE, KW_STATS,
+  KW_TO, KW_TRUE, KW_UNBOUNDED, KW_UNCACHED, KW_UNION, KW_UNKNOWN, KW_UPDATE,
+  KW_UPDATE_FN, KW_UPSERT, KW_USE, KW_USING, KW_VALUES, KW_VARCHAR, KW_VIEW, KW_WHEN,
+  KW_WHERE, KW_WITH;
 
 terminal UNUSED_RESERVED_WORD;
 
@@ -532,6 +532,7 @@ nonterminal Boolean server_ident;
 nonterminal Boolean source_ident;
 nonterminal Boolean sources_ident;
 nonterminal Boolean uri_ident;
+nonterminal testcase_ident;
 
 // For Create/Drop/Show function ddl
 nonterminal FunctionArgs function_def_args;
@@ -548,6 +549,9 @@ nonterminal ShowFunctionsStmt show_functions_stmt;
 nonterminal DropFunctionStmt drop_function_stmt;
 nonterminal TFunctionCategory opt_function_category;
 
+// Query testcase export/load
+nonterminal CopyTestCaseStmt copy_testcase_stmt;
+
 // Admin statements.
 nonterminal AdminFnStmt admin_fn_stmt;
 
@@ -633,6 +637,8 @@ stmt ::=
   {: RESULT = alter_view; :}
   | compute_stats_stmt:compute_stats
   {: RESULT = compute_stats; :}
+  | copy_testcase_stmt:copy_testcase
+  {: RESULT = copy_testcase; :}
   | drop_stats_stmt:drop_stats
   {: RESULT = drop_stats; :}
   | create_tbl_as_select_stmt:create_tbl_as_select
@@ -759,6 +765,17 @@ explain_stmt ::=
   :}
   ;
 
+copy_testcase_stmt ::=
+  KW_COPY testcase_ident:testcase KW_TO STRING_LITERAL:path query_stmt:query
+  {:
+    RESULT = CopyTestCaseStmt.to(query, new HdfsUri(path));
+  :}
+  | KW_COPY testcase_ident:testcase KW_FROM STRING_LITERAL:path
+  {:
+    RESULT = CopyTestCaseStmt.from(new HdfsUri(path));
+  :}
+  ;
+
 // Insert statements have two optional clauses: the column permutation (INSERT into
 // tbl(col1,...) etc) and the PARTITION clause. If the column permutation is present, the
 // query statement clause is optional as well.
@@ -1884,6 +1901,15 @@ server_ident ::=
   :}
   ;
 
+testcase_ident ::=
+  IDENT:ident
+  {:
+    if (!ident.toUpperCase().equals("TESTCASE")) {
+      parser.parseError("identifier", SqlParserSymbols.IDENT, "TESTCASE");
+    }
+  :}
+  ;
+
 option_ident ::=
   IDENT:ident
   {:
@@ -3478,6 +3504,8 @@ word ::=
   {: RESULT = r.toString(); :}
   | KW_COMPUTE:r
   {: RESULT = r.toString(); :}
+  | KW_COPY:r
+  {: RESULT = r.toString(); :}
   | KW_CREATE:r
   {: RESULT = r.toString(); :}
   | KW_CROSS:r
diff --git a/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java b/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java
index 3d32999..ecd7ffd 100644
--- a/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java
+++ b/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java
@@ -163,6 +163,8 @@ public class AnalysisContext {
       return isUseStmt() || isViewMetadataStmt() || isDdlStmt();
     }
 
+    public boolean isTestCaseStmt() { return stmt_ instanceof CopyTestCaseStmt; }
+
     private boolean isDdlStmt() {
       return isCreateTableLikeStmt() || isCreateTableStmt() ||
           isCreateViewStmt() || isCreateDbStmt() || isDropDbStmt() ||
@@ -195,7 +197,8 @@ public class AnalysisContext {
      */
     public boolean isHierarchicalAuthStmt() {
       return isQueryStmt() || isInsertStmt() || isUpdateStmt() || isDeleteStmt()
-          || isCreateTableAsSelectStmt() || isCreateViewStmt() || isAlterViewStmt();
+          || isCreateTableAsSelectStmt() || isCreateViewStmt() || isAlterViewStmt()
+          || isTestCaseStmt();
     }
 
     /**
@@ -482,9 +485,13 @@ public class AnalysisContext {
 
     // Re-analyze the stmt with a new analyzer.
     analysisResult_.analyzer_ = createAnalyzer(stmtTableCache);
+    // We restore the privileges collected in the first pass below. So, no point in
+    // collecting them again.
+    analysisResult_.analyzer_.setEnablePrivChecks(false);
     analysisResult_.stmt_.reset();
     try {
       analysisResult_.stmt_.analyze(analysisResult_.analyzer_);
+      analysisResult_.analyzer_.setEnablePrivChecks(true); // restore
     } catch (AnalysisException e) {
       LOG.error(String.format("Error analyzing the rewritten query.\n" +
           "Original SQL: %s\nRewritten SQL: %s", analysisResult_.stmt_.toSql(),
diff --git a/fe/src/main/java/org/apache/impala/analysis/CopyTestCaseStmt.java b/fe/src/main/java/org/apache/impala/analysis/CopyTestCaseStmt.java
new file mode 100644
index 0000000..c503cc5
--- /dev/null
+++ b/fe/src/main/java/org/apache/impala/analysis/CopyTestCaseStmt.java
@@ -0,0 +1,210 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.analysis;
+
+import avro.shaded.com.google.common.collect.Sets;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsAction;
+import org.apache.impala.authorization.Privilege;
+import org.apache.impala.authorization.PrivilegeRequestBuilder;
+import org.apache.impala.catalog.FeDb;
+import org.apache.impala.catalog.FeTable;
+import org.apache.impala.catalog.FeView;
+import org.apache.impala.catalog.Table;
+import org.apache.impala.common.AnalysisException;
+import org.apache.impala.common.FileSystemUtil;
+import org.apache.impala.common.ImpalaException;
+import org.apache.impala.common.ImpalaRuntimeException;
+import org.apache.impala.common.JniUtil;
+import org.apache.impala.common.Pair;
+import org.apache.impala.service.BackendConfig;
+import org.apache.impala.thrift.TTestCaseData;
+import org.apache.impala.util.CompressionUtil;
+import org.apache.log4j.Logger;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Set;
+import java.util.UUID;
+
+/**
+ * Statement that analyzes COPY TESTCASE [TO|FROM] <URI> [QUERY STMT]. This is used for
+ * both creating a testcase file for a given query statement and loading an already
+ * created testcase file.
+ */
+
+public class CopyTestCaseStmt extends StatementBase {
+
+  // File name prefix of the testcase file for a given query statement.
+  private static final String TEST_OUTPUT_FILE_PREFIX = "impala-testcase-data-";
+
+  private static final Logger LOG = Logger.getLogger(CopyTestCaseStmt.class);
+
+  // QueryStmt for which the testcase should be created. Set to null if we are loading
+  // an existing testcase.
+  private final QueryStmt queryStmt_;
+  // Corresponds to:
+  //  - HDFS output dir that should contain the testcase output file (or)
+  //  - Full HDFS path for a given input testcase file while loading it.
+  private final HdfsUri hdfsPath_;
+
+  private CopyTestCaseStmt(QueryStmt stmt, HdfsUri path) {
+    queryStmt_ = stmt;
+    hdfsPath_ = path;
+  }
+
+  // Convenience c'tors for creating/loading a testcase.
+  public static CopyTestCaseStmt to(QueryStmt stmt, HdfsUri path) {
+    return new CopyTestCaseStmt(stmt, path);
+  }
+
+  public static CopyTestCaseStmt from(HdfsUri path) {
+    return new CopyTestCaseStmt(null, path);
+  }
+
+  /**
+   * @return True if this stmt corresponds to a testcase export for a given query.
+   * False otherwise.
+   */
+  public boolean isTestCaseExport() { return queryStmt_ != null; }
+
+  public QueryStmt getQueryStmt() { return queryStmt_; }
+
+  public String getHdfsPath() { return hdfsPath_.getLocation(); }
+
+  @Override
+  public void collectTableRefs(List<TableRef> referencedTables) {
+    if (!isTestCaseExport()) return;
+    queryStmt_.collectTableRefs(referencedTables);
+  }
+
+  @Override
+  public void analyze(Analyzer analyzer) throws AnalysisException {
+    super.analyze(analyzer);
+    if (isTestCaseExport()) {
+      hdfsPath_.analyze(analyzer, Privilege.ALL, FsAction.READ_WRITE,
+          /*registerPrivReq*/ true, /*pathMustExist*/ true);
+      try {
+        if (!FileSystemUtil.isDir(hdfsPath_.getPath())) {
+          throw new AnalysisException(String.format("Path is not a valid directory to " +
+                "write the testcase output file: %s", hdfsPath_));
+        }
+      } catch (IOException e) {
+        throw new AnalysisException(
+            String.format("Error checking the status of path: %s", hdfsPath_), e);
+      }
+      queryStmt_.analyze(analyzer);
+      // Requires VIEW_METADATA privilege to dump the metadata state of all the
+      // referenced objects.
+      Pair<Set<FeDb>, Set<FeTable>> referencedObjects = getReferencedCatalogObjects();
+      for (FeDb db: referencedObjects.first) {
+        analyzer.registerPrivReq(new PrivilegeRequestBuilder().onDb(db.getName()).allOf
+            (Privilege.VIEW_METADATA).toRequest());
+      }
+      for (FeTable table: referencedObjects.second) {
+        analyzer.registerPrivReq(new PrivilegeRequestBuilder().onTable(table.getDb()
+            .getName(), table.getName()).allOf(Privilege.VIEW_METADATA).toRequest());
+      }
+    } else {
+      hdfsPath_.analyze(analyzer, Privilege.ALL, FsAction.READ, /*registerPrivReq*/ true,
+          /*pathMustExist*/ true);
+    }
+  }
+
+  /**
+   * Helper method that returns all the base tables, view and databases referenced in the
+   * queryStmt_. Omits query local views.
+   */
+  private Pair<Set<FeDb>, Set<FeTable>> getReferencedCatalogObjects() {
+    Preconditions.checkState(queryStmt_.isAnalyzed());
+    Set<FeTable> referencedTblsAndViews = Sets.newIdentityHashSet();
+    Set<FeDb> referencedDbs = Sets.newIdentityHashSet();
+    for (TableRef ref: queryStmt_.collectTableRefs()) {
+      referencedDbs.add(ref.getTable().getDb());
+      referencedTblsAndViews.add(ref.getTable());
+    }
+    for (FeView view: queryStmt_.collectInlineViews()) {
+      if (view == null || view.isLocalView()) continue;
+      referencedDbs.add(view.getDb());
+      referencedTblsAndViews.add(view);
+    }
+    return new Pair(referencedDbs, referencedTblsAndViews);
+  }
+
+  /**
+   * Walks through the analyzed queryStmt_ tree to identify all the referenced tables,
+   * views and databases which are then serialized into the TTestCaseData output val.
+   */
+  @VisibleForTesting
+  public TTestCaseData getTestCaseData() {
+    Preconditions.checkState(queryStmt_.isAnalyzed());
+    TTestCaseData result = new TTestCaseData(queryStmt_.getOrigSqlString(),
+        hdfsPath_.getLocation(), BackendConfig.INSTANCE.getImpalaBuildVersion());
+    Pair<Set<FeDb>, Set<FeTable>> referencedObjects = getReferencedCatalogObjects();
+    // Sort the referenced objects for deterministic testcase outputs.
+    List<FeDb> referencedDbs = new ArrayList<>(referencedObjects.first);
+    List<FeTable> referencedTbls = new ArrayList<>(referencedObjects.second);
+    Collections.sort(referencedDbs, FeDb.NAME_COMPARATOR);
+    Collections.sort(referencedTbls, FeTable.NAME_COMPARATOR);
+    for (FeDb db: referencedDbs) {
+      result.addToDbs(db.toThrift());
+    }
+    for (FeTable table: referencedTbls) {
+      Preconditions.checkState(table instanceof FeTable);
+      ((Table) table).getLock().lock();
+      try {
+        result.addToTables_and_views(((Table) table).toThrift());
+      } finally {
+        ((Table) table).getLock().unlock();
+      }
+    }
+    return result;
+  }
+
+  /**
+   * Builds the testcase data for the input queryStmt_ and writes it to a file in the
+   * hdfsPath_ directory. Randomly generates the output filename and returns the fully
+   * qualified path.
+   */
+  public String writeTestCaseData() throws ImpalaException {
+    TTestCaseData data = getTestCaseData();
+    Path filePath = new Path(
+        hdfsPath_.getPath(), TEST_OUTPUT_FILE_PREFIX + UUID.randomUUID().toString());
+    try {
+      FileSystem fs = FileSystemUtil.getDefaultFileSystem();
+      FSDataOutputStream os = fs.create(filePath);
+      try {
+        os.write(CompressionUtil.deflateCompress(JniUtil.serializeToThrift(data)));
+      } finally {
+        os.close();
+      }
+    } catch (IOException e) {
+      throw new ImpalaRuntimeException(String.format("Error writing test case output to" +
+          " file: %s", filePath), e);
+    }
+    LOG.info(String.format(
+        "Created testcase file %s for query: %s", filePath, data.getQuery_stmt()));
+    return filePath.toString();
+  }
+}
diff --git a/fe/src/main/java/org/apache/impala/analysis/HdfsUri.java b/fe/src/main/java/org/apache/impala/analysis/HdfsUri.java
index 693c211..d74545c 100644
--- a/fe/src/main/java/org/apache/impala/analysis/HdfsUri.java
+++ b/fe/src/main/java/org/apache/impala/analysis/HdfsUri.java
@@ -52,17 +52,20 @@ public class HdfsUri {
 
   public void analyze(Analyzer analyzer, Privilege privilege)
       throws AnalysisException {
-    analyze(analyzer, privilege, FsAction.NONE, true);
+    analyze(analyzer, privilege, FsAction.NONE, /*registerPrivReq*/ true,
+        /*pathMustExist*/ false);
   }
 
   public void analyze(Analyzer analyzer, Privilege privilege, FsAction perm)
       throws AnalysisException {
-    analyze(analyzer, privilege, perm, true);
+    analyze(analyzer, privilege, perm, /*pathMustExist*/ true,
+        /*pathMustExist*/ false);
   }
 
   public void analyze(Analyzer analyzer, Privilege privilege, boolean registerPrivReq)
       throws AnalysisException {
-    analyze(analyzer, privilege, FsAction.NONE, registerPrivReq);
+    analyze(
+        analyzer, privilege, FsAction.NONE, registerPrivReq, /*pathMustExist*/ false);
   }
 
   /**
@@ -70,9 +73,10 @@ public class HdfsUri {
    * Optionally check location path permission, issue warning if impala user doesn't
    * have sufficient access rights.
    * Optionally register a privilege request. Used by GRANT/REVOKE privilege statements.
+   * If pathMustExist is true and the path does not exist, AnalysisException is thrown.
    */
   public void analyze(Analyzer analyzer, Privilege privilege, FsAction perm,
-      boolean registerPrivReq) throws AnalysisException {
+      boolean registerPrivReq, boolean pathMustExist) throws AnalysisException {
     if (location_.isEmpty()) {
       throw new AnalysisException("URI path cannot be empty.");
     }
@@ -88,15 +92,20 @@ public class HdfsUri {
     Path parentPath = uriPath_.getParent();
     try {
       FileSystem fs = uriPath_.getFileSystem(FileSystemUtil.getConfiguration());
-      boolean pathExists = false;
+      if (pathMustExist && !fs.exists(uriPath_)) {
+        throw new AnalysisException(String.format("Path does not exist: %s", uriPath_));
+      }
+      boolean parentPathExists = false;
       StringBuilder errorMsg = new StringBuilder();
       try {
-        pathExists = fs.exists(parentPath);
-        if (!pathExists) errorMsg.append("Path does not exist.");
+        parentPathExists = fs.exists(parentPath);
+        if (!parentPathExists) {
+          errorMsg.append("Path does not exist.");
+        }
       } catch (Exception e) {
         errorMsg.append(e.getMessage());
       }
-      if (!pathExists) {
+      if (!parentPathExists) {
         analyzer.addWarning(String.format("Path '%s' cannot be reached: %s",
             parentPath, errorMsg.toString()));
       } else if (perm != FsAction.NONE) {
diff --git a/fe/src/main/java/org/apache/impala/analysis/QueryStmt.java b/fe/src/main/java/org/apache/impala/analysis/QueryStmt.java
index f8cc7dc..f792e40 100644
--- a/fe/src/main/java/org/apache/impala/analysis/QueryStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/QueryStmt.java
@@ -22,6 +22,7 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 
+import com.google.common.collect.Sets;
 import org.apache.impala.catalog.FeView;
 import org.apache.impala.catalog.Type;
 import org.apache.impala.catalog.View;
@@ -120,6 +121,12 @@ public abstract class QueryStmt extends StatementBase {
     collectTableRefs(tblRefs, false);
   }
 
+  public List<TableRef> collectTableRefs() {
+    List<TableRef> tableRefs = Lists.newArrayList();
+    collectTableRefs(tableRefs);
+    return tableRefs;
+  }
+
   /**
    * Helper for collectFromClauseTableRefs() and collectTableRefs().
    * If 'fromClauseOnly' is true only collects table references in the FROM clause,
@@ -133,6 +140,12 @@ public abstract class QueryStmt extends StatementBase {
     }
   }
 
+  public List<FeView> collectInlineViews() {
+    Set<FeView> inlineViews = Sets.newHashSet();
+    collectInlineViews(inlineViews);
+    return new ArrayList<>(inlineViews);
+  }
+
   /**
   * Returns all inline view references in this statement.
   */
diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
index b517e3f..4cd1db0 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
@@ -177,20 +177,6 @@ public class SelectStmt extends QueryStmt {
   }
 
   /**
-   * @return the QueryStmt present in the whereClause_ if present, null otherwise.
-   */
-  private QueryStmt getWhereSubQueryStmt() {
-    QueryStmt whereQueryStmt = null;
-    if (whereClause_ != null) {
-      Subquery whereSubquery = whereClause_.getSubquery();
-      if (whereSubquery != null) {
-        whereQueryStmt = whereSubquery.getStatement();
-      }
-    }
-    return whereQueryStmt;
-  }
-
-  /**
    * Creates resultExprs and baseTblResultExprs.
    */
   @Override
@@ -1179,9 +1165,18 @@ public class SelectStmt extends QueryStmt {
         inlineViewRef.getViewStmt().collectInlineViews(inlineViews);
       }
     }
-    QueryStmt whereStmt = getWhereSubQueryStmt();
-    if (whereStmt != null) {
-      whereStmt.collectInlineViews(inlineViews);
+    if (whereClause_ != null) {
+      for (Expr conjunct : whereClause_.getConjuncts()) {
+        List<Subquery> whereSubQueries = Lists.newArrayList();
+        conjunct.collect(Predicates.instanceOf(Subquery.class), whereSubQueries);
+        if (whereSubQueries.size() == 0) continue;
+        // Check that multiple subqueries do not exist in the same expression. This
+        // should have been already caught by the analysis passes.
+        Preconditions.checkState(whereSubQueries.size() == 1, "Invariant " +
+            "violated: Multiple subqueries found in a single expression: " +
+            conjunct.toSql());
+        whereSubQueries.get(0).getStatement().collectInlineViews(inlineViews);
+      }
     }
   }
 
diff --git a/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java b/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java
index 2965456..850b629 100644
--- a/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java
+++ b/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java
@@ -65,6 +65,8 @@ public class StmtRewriter {
       queryStmt = ((UpdateStmt) analysisResult.getStmt()).getQueryStmt();
     } else if (analysisResult.isDeleteStmt()) {
       queryStmt = ((DeleteStmt) analysisResult.getStmt()).getQueryStmt();
+    } else if (analysisResult.isTestCaseStmt()) {
+      queryStmt = ((CopyTestCaseStmt) analysisResult.getStmt()).getQueryStmt();
     } else {
       throw new AnalysisException("Unsupported statement: " + stmt.toSql());
     }
diff --git a/fe/src/main/java/org/apache/impala/catalog/Catalog.java b/fe/src/main/java/org/apache/impala/catalog/Catalog.java
index 7a3aa93..3145686 100644
--- a/fe/src/main/java/org/apache/impala/catalog/Catalog.java
+++ b/fe/src/main/java/org/apache/impala/catalog/Catalog.java
@@ -63,8 +63,7 @@ public abstract class Catalog implements AutoCloseable {
   public static final TUniqueId INITIAL_CATALOG_SERVICE_ID = new TUniqueId(0L, 0L);
   public static final String DEFAULT_DB = "default";
 
-  protected final MetaStoreClientPool metaStoreClientPool_ =
-      new MetaStoreClientPool(0, 0);
+  private final MetaStoreClientPool metaStoreClientPool_;
 
   // Cache of authorization policy metadata. Populated from data retried from the
   // Sentry Service, if configured.
@@ -84,29 +83,28 @@ public abstract class Catalog implements AutoCloseable {
   protected final CatalogObjectCache<HdfsCachePool> hdfsCachePools_ =
       new CatalogObjectCache<HdfsCachePool>(false);
 
-  public Catalog() {
+  /**
+   * Creates a new instance of Catalog backed by a given MetaStoreClientPool.
+   */
+  public Catalog(MetaStoreClientPool metaStoreClientPool) {
     dataSources_ = new CatalogObjectCache<DataSource>();
+    metaStoreClientPool_ = Preconditions.checkNotNull(metaStoreClientPool);
   }
 
   /**
-   * Creates a new instance of Catalog. It also adds 'numClients' clients to
-   * 'metastoreClientPool_'.
-   * 'initialCnxnTimeoutSec' specifies the time (in seconds) Catalog will wait to
-   * establish an initial connection to the HMS. Using this setting allows catalogd and
-   * HMS to be started simultaneously.
+   * Creates a Catalog instance with the default MetaStoreClientPool implementation.
+   * Refer to MetaStoreClientPool class for more details.
    */
-  public Catalog(int numClients, int initialCnxnTimeoutSec) {
-    this();
-    metaStoreClientPool_.initClients(numClients, initialCnxnTimeoutSec);
+  public Catalog() {
+    this(new MetaStoreClientPool(0, 0));
   }
 
   /**
    * Adds a new database to the catalog, replacing any existing database with the same
-   * name. Returns the previous database with this name, or null if there was no
-   * previous database.
+   * name.
    */
-  public Db addDb(Db db) {
-    return dbCache_.get().put(db.getName().toLowerCase(), db);
+  public void addDb(Db db) {
+    dbCache_.get().put(db.getName().toLowerCase(), db);
   }
 
   /**
diff --git a/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java b/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
index d6d375f..274ca35 100644
--- a/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
+++ b/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
@@ -253,17 +253,18 @@ public class CatalogServiceCatalog extends Catalog {
   private final Semaphore partialObjectFetchAccess_ =
       new Semaphore(MAX_PARALLEL_PARTIAL_FETCH_RPC_COUNT, /*fair =*/ true);
 
-  /**
-   * Initialize the CatalogServiceCatalog. If 'loadInBackground' is true, table metadata
-   * will be loaded in the background. 'initialHmsCnxnTimeoutSec' specifies the time (in
-   * seconds) CatalogServiceCatalog will wait to establish an initial connection to the
-   * HMS before giving up. Using this setting allows catalogd and HMS to be started
-   * simultaneously.
-   */
+    /**
+     * Initialize the CatalogServiceCatalog using a given MetastoreClientPool impl.
+     * @param loadInBackground If true, table metadata will be loaded in the background.
+     * @param numLoadingThreads Number of threads used to load table metadata.
+     * @param metaStoreClientPool A pool of HMS clients backing this Catalog.
+     * @throws ImpalaException
+     */
   public CatalogServiceCatalog(boolean loadInBackground, int numLoadingThreads,
-      int initialHmsCnxnTimeoutSec, SentryConfig sentryConfig, TUniqueId catalogServiceId,
-      String kerberosPrincipal, String localLibraryPath) throws ImpalaException {
-    super(INITIAL_META_STORE_CLIENT_POOL_SIZE, initialHmsCnxnTimeoutSec);
+      SentryConfig sentryConfig, TUniqueId catalogServiceId, String kerberosPrincipal,
+      String localLibraryPath, MetaStoreClientPool metaStoreClientPool)
+      throws ImpalaException {
+    super(metaStoreClientPool);
     catalogServiceId_ = catalogServiceId;
     tableLoadingMgr_ = new TableLoadingMgr(this, numLoadingThreads);
     loadInBackground_ = loadInBackground;
@@ -323,6 +324,20 @@ public class CatalogServiceCatalog extends Catalog {
     }
   }
 
+    /**
+     * Initializes the Catalog using the default MetastoreClientPool impl.
+     * @param initialHmsCnxnTimeoutSec Time (in seconds) CatalogServiceCatalog will wait
+     * to establish an initial connection to the HMS before giving up.
+     */
+
+  public CatalogServiceCatalog(boolean loadInBackground, int numLoadingThreads,
+      int initialHmsCnxnTimeoutSec, SentryConfig sentryConfig, TUniqueId catalogServiceId,
+      String kerberosPrincipal, String localLibraryPath) throws ImpalaException {
+    this(loadInBackground, numLoadingThreads, sentryConfig, catalogServiceId,
+        kerberosPrincipal, localLibraryPath, new MetaStoreClientPool(
+        INITIAL_META_STORE_CLIENT_POOL_SIZE, initialHmsCnxnTimeoutSec));
+  }
+
   // Timeout for acquiring a table lock
   // TODO: Make this configurable
   private static final long TBL_LOCK_TIMEOUT_MS = 7200000;
@@ -1393,6 +1408,19 @@ public class CatalogServiceCatalog extends Catalog {
   }
 
   /**
+   * Adds a table 'table' to the database 'db' and returns the table that was added.
+   */
+  public Table addTable(Db db, Table table) {
+    versionLock_.writeLock().lock();
+    try {
+      Preconditions.checkNotNull(db).addTable(Preconditions.checkNotNull(table));
+    } finally {
+      versionLock_.writeLock().unlock();
+    }
+    return table;
+  }
+
+  /**
    * Gets the table with the given name, loading it if needed (if the existing catalog
    * object is not yet loaded). Returns the matching Table or null if no table with this
    * name exists in the catalog.
diff --git a/fe/src/main/java/org/apache/impala/catalog/FeDb.java b/fe/src/main/java/org/apache/impala/catalog/FeDb.java
index 057a2fa..11e5aa1 100644
--- a/fe/src/main/java/org/apache/impala/catalog/FeDb.java
+++ b/fe/src/main/java/org/apache/impala/catalog/FeDb.java
@@ -16,6 +16,7 @@
 // under the License.
 package org.apache.impala.catalog;
 
+import java.util.Comparator;
 import java.util.List;
 
 import org.apache.hadoop.hive.metastore.api.Database;
@@ -30,6 +31,14 @@ import org.apache.impala.util.PatternMatcher;
  * Frontend interface for interacting with a database.
  */
 public interface FeDb extends HasName {
+
+  Comparator<FeDb> NAME_COMPARATOR = new Comparator<FeDb>() {
+    @Override
+    public int compare(FeDb db1, FeDb db2) {
+      return db1.getName().compareTo(db2.getName());
+    }
+  };
+
   /**
    * @return the name of the database
    */
diff --git a/fe/src/main/java/org/apache/impala/catalog/FeTable.java b/fe/src/main/java/org/apache/impala/catalog/FeTable.java
index d395d48..4c4c37d 100644
--- a/fe/src/main/java/org/apache/impala/catalog/FeTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/FeTable.java
@@ -16,6 +16,7 @@
 // under the License.
 package org.apache.impala.catalog;
 
+import java.util.Comparator;
 import java.util.List;
 import java.util.Set;
 
@@ -29,6 +30,14 @@ import org.apache.impala.thrift.TTableStats;
  * Frontend interface for interacting with a table.
  */
 public interface FeTable {
+
+  Comparator<FeTable> NAME_COMPARATOR = new Comparator<FeTable>() {
+    @Override
+    public int compare(FeTable t1, FeTable t2) {
+      return t1.getFullName().compareTo(t2.getFullName());
+    }
+  };
+
   /** @see CatalogObject#isLoaded() */
   boolean isLoaded();
 
diff --git a/fe/src/main/java/org/apache/impala/catalog/MetaStoreClientPool.java b/fe/src/main/java/org/apache/impala/catalog/MetaStoreClientPool.java
index 93f8585..8c4454f 100644
--- a/fe/src/main/java/org/apache/impala/catalog/MetaStoreClientPool.java
+++ b/fe/src/main/java/org/apache/impala/catalog/MetaStoreClientPool.java
@@ -35,6 +35,12 @@ import com.google.common.base.Preconditions;
  * Manages a pool of RetryingMetaStoreClient connections. If the connection pool is empty
  * a new client is created and added to the pool. The idle pool can expand till a maximum
  * size of MAX_HMS_CONNECTION_POOL_SIZE, beyond which the connections are closed.
+ *
+ * This default implementation reads the Hive metastore configuration from the HiveConf
+ * object passed in the c'tor. If you are looking for a temporary HMS instance created
+ * from scratch for unit tests, refer to EmbeddedMetastoreClientPool class. It mocks an
+ * actual HMS by creating a temporary Derby backend database on the fly. It should not
+ * be used for production Catalog server instances.
  */
 public class MetaStoreClientPool {
   // Key for config option read from hive-site.xml
diff --git a/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java b/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java
index 21e30ac..1de2744 100644
--- a/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java
+++ b/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java
@@ -17,6 +17,7 @@
 
 package org.apache.impala.common;
 
+import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
@@ -393,6 +394,13 @@ public class FileSystemUtil {
     return fs;
   }
 
+  /**
+   * Returns the FileSystem object for a given path using the cached config.
+   */
+  public static FileSystem getFileSystemForPath(Path p) throws IOException {
+    return p.getFileSystem(CONF);
+  }
+
   public static DistributedFileSystem getDistributedFileSystem() throws IOException {
     FileSystem fs = getDefaultFileSystem();
     Preconditions.checkState(fs instanceof DistributedFileSystem);
@@ -511,4 +519,12 @@ public class FileSystemUtil {
       return null;
     }
   }
+
+  /**
+   * Returns true if the path 'p' is a directory, false otherwise.
+   */
+  public static boolean isDir(Path p) throws IOException {
+    FileSystem fs = getFileSystemForPath(p);
+    return fs.isDirectory(p);
+  }
 }
diff --git a/fe/src/main/java/org/apache/impala/common/JniUtil.java b/fe/src/main/java/org/apache/impala/common/JniUtil.java
index edfcd8c..3e1e7df 100644
--- a/fe/src/main/java/org/apache/impala/common/JniUtil.java
+++ b/fe/src/main/java/org/apache/impala/common/JniUtil.java
@@ -94,6 +94,19 @@ public class JniUtil {
   }
 
   /**
+   * Serializes input into a byte[] using the default protocol factory.
+   */
+  public static <T extends TBase<?, ?>>
+  byte[] serializeToThrift(T input) throws ImpalaException {
+    TSerializer serializer = new TSerializer(protocolFactory_);
+    try {
+      return serializer.serialize(input);
+    } catch (TException e) {
+      throw new InternalException(e.getMessage());
+    }
+  }
+
+  /**
    * Serializes input into a byte[] using a given protocol factory.
    */
   public static <T extends TBase<?, ?>, F extends TProtocolFactory>
@@ -105,6 +118,12 @@ public class JniUtil {
       throw new InternalException(e.getMessage());
     }
   }
+
+  public static <T extends TBase<?, ?>>
+  void deserializeThrift(T result, byte[] thriftData) throws ImpalaException {
+    deserializeThrift(protocolFactory_, result, thriftData);
+  }
+
   /**
    * Deserialize a serialized form of a Thrift data structure to its object form.
    */
diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
index cc31ce4..cbbfc43 100644
--- a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
@@ -26,6 +26,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
+import com.google.common.collect.Sets;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.impala.analysis.AggregateInfo;
@@ -1108,6 +1109,11 @@ public class HdfsScanNode extends ScanNode {
    * ultimately determined by the scheduling done by the backend's Scheduler).
    * Assume that scan ranges that can be scheduled locally will be, and that scan
    * ranges that cannot will be round-robined across the cluster.
+   *
+   * When the planner runs in the debug mode (SET PLANNER_TESTCASE_MODE=true), the
+   * estimation does not take into account the local cluster topology and instead
+   * assumes that every scan range location is local to some datanode. This should only
+   * be set when replaying a testcase from some other cluster.
    */
   protected void computeNumNodes(Analyzer analyzer, long cardinality) {
     Preconditions.checkNotNull(scanRangeSpecs_);
@@ -1117,42 +1123,62 @@ public class HdfsScanNode extends ScanNode {
     int numLocalRanges = 0;
     int numRemoteRanges = 0;
     if (scanRangeSpecs_.isSetConcrete_ranges()) {
-      for (TScanRangeLocationList range : scanRangeSpecs_.concrete_ranges) {
-        boolean anyLocal = false;
-        if (range.isSetLocations()) {
-          for (TScanRangeLocation loc : range.locations) {
-            TNetworkAddress dataNode =
-                analyzer.getHostIndex().getEntry(loc.getHost_idx());
-            if (cluster.contains(dataNode)) {
-              anyLocal = true;
-              // Use the full datanode address (including port) to account for the test
-              // minicluster where there are multiple datanodes and impalads on a single
-              // host.  This assumes that when an impalad is colocated with a datanode,
-              // there are the same number of impalads as datanodes on this host in this
-              // cluster.
-              localHostSet.add(dataNode);
-            }
+      if (analyzer.getQueryOptions().planner_testcase_mode) {
+        // TODO: Have a separate scan node implementation that mocks an HDFS scan
+        // node rather than including the logic here.
+
+        // Track the number of unique host indexes across all scan ranges. Assume for
+        // the sake of simplicity that every scan is served from a local datanode.
+        Set<Integer> dummyHostIndex = Sets.newHashSet();
+        for (TScanRangeLocationList range : scanRangeSpecs_.concrete_ranges) {
+          for (TScanRangeLocation loc: range.locations) {
+            dummyHostIndex.add(loc.getHost_idx());
+            ++numLocalRanges;
           }
         }
-        // This range has at least one replica with a colocated impalad, so assume it
-        // will be scheduled on one of those nodes.
-        if (anyLocal) {
-          ++numLocalRanges;
-        } else {
-          ++numRemoteRanges;
+        totalNodes = Math.min(
+            scanRangeSpecs_.concrete_ranges.size(), dummyHostIndex.size());
+        LOG.info(String.format("Planner running in DEBUG mode. ScanNode: %s, " +
+            "TotalNodes %d, Local Ranges %d", tbl_.getFullName(), totalNodes,
+            numLocalRanges));
+      } else {
+        for (TScanRangeLocationList range : scanRangeSpecs_.concrete_ranges) {
+          boolean anyLocal = false;
+          if (range.isSetLocations()) {
+            for (TScanRangeLocation loc : range.locations) {
+              TNetworkAddress dataNode =
+                  analyzer.getHostIndex().getEntry(loc.getHost_idx());
+              if (cluster.contains(dataNode)) {
+                anyLocal = true;
+                // Use the full datanode address (including port) to account for the test
+                // minicluster where there are multiple datanodes and impalads on a single
+                // host.  This assumes that when an impalad is colocated with a datanode,
+                // there are the same number of impalads as datanodes on this host in this
+                // cluster.
+                localHostSet.add(dataNode);
+              }
+            }
+          }
+          // This range has at least one replica with a colocated impalad, so assume it
+          // will be scheduled on one of those nodes.
+          if (anyLocal) {
+            ++numLocalRanges;
+          } else {
+            ++numRemoteRanges;
+          }
+          // Approximate the number of nodes that will execute locally assigned ranges to
+          // be the smaller of the number of locally assigned ranges and the number of
+          // hosts that hold block replica for those ranges.
+          int numLocalNodes = Math.min(numLocalRanges, localHostSet.size());
+          // The remote ranges are round-robined across all the impalads.
+          int numRemoteNodes = Math.min(numRemoteRanges, cluster.numExecutors());
+          // The local and remote assignments may overlap, but we don't know by how much
+          // so conservatively assume no overlap.
+          totalNodes = Math.min(numLocalNodes + numRemoteNodes, cluster.numExecutors());
+          // Exit early if all hosts have a scan range assignment, to avoid extraneous
+          // work in case the number of scan ranges dominates the number of nodes.
+          if (totalNodes == cluster.numExecutors()) break;
         }
-        // Approximate the number of nodes that will execute locally assigned ranges to
-        // be the smaller of the number of locally assigned ranges and the number of
-        // hosts that hold block replica for those ranges.
-        int numLocalNodes = Math.min(numLocalRanges, localHostSet.size());
-        // The remote ranges are round-robined across all the impalads.
-        int numRemoteNodes = Math.min(numRemoteRanges, cluster.numExecutors());
-        // The local and remote assignments may overlap, but we don't know by how much so
-        // conservatively assume no overlap.
-        totalNodes = Math.min(numLocalNodes + numRemoteNodes, cluster.numExecutors());
-        // Exit early if all hosts have a scan range assignment, to avoid extraneous work
-        // in case the number of scan ranges dominates the number of nodes.
-        if (totalNodes == cluster.numExecutors()) break;
       }
     }
     // Handle the generated range specifications.
diff --git a/fe/src/main/java/org/apache/impala/planner/Planner.java b/fe/src/main/java/org/apache/impala/planner/Planner.java
index 56dec14..46371e6 100644
--- a/fe/src/main/java/org/apache/impala/planner/Planner.java
+++ b/fe/src/main/java/org/apache/impala/planner/Planner.java
@@ -279,6 +279,12 @@ public class Planner {
           PrintUtils.printBytesRoundedToMb(request.getPer_host_mem_estimate())));
       hasHeader = true;
     }
+    // Warn if the planner is running in DEBUG mode.
+    if (request.query_ctx.client_request.query_options.planner_testcase_mode) {
+      str.append("WARNING: The planner is running in TESTCASE mode. This should only be "
+          + "used by developers for debugging.\nTo disable it, do SET " +
+          "PLANNER_TESTCASE_MODE=false.\n");
+    }
     if (request.query_ctx.disable_codegen_hint) {
       str.append("Codegen disabled by planner\n");
     }
diff --git a/fe/src/main/java/org/apache/impala/service/BackendConfig.java b/fe/src/main/java/org/apache/impala/service/BackendConfig.java
index a4a7b72..6d92364 100644
--- a/fe/src/main/java/org/apache/impala/service/BackendConfig.java
+++ b/fe/src/main/java/org/apache/impala/service/BackendConfig.java
@@ -63,6 +63,7 @@ public class BackendConfig {
   }
   public int getKuduClientTimeoutMs() { return backendCfg_.kudu_operation_timeout_ms; }
 
+  public String getImpalaBuildVersion() { return backendCfg_.impala_build_version; }
   public int getImpalaLogLevel() { return backendCfg_.impala_log_lvl; }
   public int getNonImpalaJavaVlogLevel() { return backendCfg_.non_impala_java_vlog; }
   public long getSentryCatalogPollingFrequency() {
diff --git a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
index 90369bb..137e682 100644
--- a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
+++ b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
@@ -17,6 +17,8 @@
 
 package org.apache.impala.service;
 
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -27,6 +29,8 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.StatsSetupConst;
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -45,6 +49,7 @@ import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.PrincipalType;
 import org.apache.hadoop.hive.metastore.api.SerDeInfo;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.io.IOUtils;
 import org.apache.impala.analysis.AlterTableSortByStmt;
 import org.apache.impala.analysis.FunctionName;
 import org.apache.impala.analysis.TableName;
@@ -84,6 +89,7 @@ import org.apache.impala.common.FileSystemUtil;
 import org.apache.impala.common.ImpalaException;
 import org.apache.impala.common.ImpalaRuntimeException;
 import org.apache.impala.common.InternalException;
+import org.apache.impala.common.JniUtil;
 import org.apache.impala.common.Pair;
 import org.apache.impala.common.Reference;
 import org.apache.impala.compat.MetastoreShim;
@@ -122,6 +128,7 @@ import org.apache.impala.thrift.TCreateFunctionParams;
 import org.apache.impala.thrift.TCreateOrAlterViewParams;
 import org.apache.impala.thrift.TCreateTableLikeParams;
 import org.apache.impala.thrift.TCreateTableParams;
+import org.apache.impala.thrift.TDatabase;
 import org.apache.impala.thrift.TDdlExecRequest;
 import org.apache.impala.thrift.TDdlExecResponse;
 import org.apache.impala.thrift.TDropDataSourceParams;
@@ -135,6 +142,7 @@ import org.apache.impala.thrift.TGrantRevokePrivParams;
 import org.apache.impala.thrift.TGrantRevokeRoleParams;
 import org.apache.impala.thrift.THdfsCachingOp;
 import org.apache.impala.thrift.THdfsFileFormat;
+import org.apache.impala.thrift.TCopyTestCaseReq;
 import org.apache.impala.thrift.TPartitionDef;
 import org.apache.impala.thrift.TPartitionKeyValue;
 import org.apache.impala.thrift.TPartitionStats;
@@ -153,9 +161,11 @@ import org.apache.impala.thrift.TTable;
 import org.apache.impala.thrift.TTableName;
 import org.apache.impala.thrift.TTableRowFormat;
 import org.apache.impala.thrift.TTableStats;
+import org.apache.impala.thrift.TTestCaseData;
 import org.apache.impala.thrift.TTruncateParams;
 import org.apache.impala.thrift.TUpdateCatalogRequest;
 import org.apache.impala.thrift.TUpdateCatalogResponse;
+import org.apache.impala.util.CompressionUtil;
 import org.apache.impala.util.FunctionUtils;
 import org.apache.impala.util.HdfsCachingUtil;
 import org.apache.impala.util.MetaStoreUtil;
@@ -163,6 +173,7 @@ import org.apache.log4j.Logger;
 import org.apache.thrift.TException;
 
 import com.codahale.metrics.Timer;
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
@@ -341,6 +352,9 @@ public class CatalogOpExecutor {
       case ALTER_DATABASE:
         alterDatabase(ddlRequest.getAlter_db_params(), response);
         break;
+      case COPY_TESTCASE:
+        copyTestCaseData(ddlRequest.getCopy_test_case_params(), response);
+        break;
       default: throw new IllegalStateException("Unexpected DDL exec request type: " +
           ddlRequest.ddl_type);
     }
@@ -362,6 +376,95 @@ public class CatalogOpExecutor {
   }
 
   /**
+   * Loads the testcase metadata from the request into the catalog cache and returns
+   * the query statement this input testcase corresponds to. When loading the table and
+   * database objects, this method overwrites any existing tables or databases with the
+   * same name. However, these overwrites are *not* persistent. The old table/db
+   * states can be recovered by blowing away the cache using INVALIDATE METADATA.
+   */
+  @VisibleForTesting
+  public String copyTestCaseData(
+      TCopyTestCaseReq request, TDdlExecResponse response)
+      throws ImpalaException {
+    Path inputPath = new Path(Preconditions.checkNotNull(request.input_path));
+    // Read the data from the source FS.
+    FileSystem fs;
+    FSDataInputStream in;
+    ByteArrayOutputStream out = new ByteArrayOutputStream();
+    try {
+      fs = FileSystemUtil.getFileSystemForPath(inputPath);
+      in = fs.open(inputPath);
+      IOUtils.copyBytes(in, out, fs.getConf(), /*close streams*/true);
+    } catch (IOException e) {
+      throw new ImpalaRuntimeException(String.format("Error reading test case data from" +
+          " path: %s", inputPath), e);
+    }
+    byte[] decompressedBytes = CompressionUtil.deflateDecompress(out.toByteArray());
+    TTestCaseData testCaseData = new TTestCaseData();
+    try {
+      JniUtil.deserializeThrift(testCaseData, decompressedBytes);
+    } catch (ImpalaException e) {
+      throw new CatalogException(String.format("Error deserializing the testcase data " +
+          "at path %s. File data may be corrupt or incompatible with the current version "
+          + "of Impala.", inputPath.toString()),e);
+    }
+
+    // Add the databases first, followed by the table and views information.
+    // Overwrites any existing Db/Table objects with name clashes. Since we overwrite
+    // the state in-memory and do not flush it to HMS, the older state can be recovered
+    // by loading everything back from HMS. For ex: INVALIDATE METADATA.
+    int numDbsAdded = 0;
+    for (TDatabase thriftDb: testCaseData.getDbs()) {
+      Db db = Db.fromTDatabase(thriftDb);
+      // Set a new version to force an overwrite if a Db already exists with the same
+      // name.
+      db.setCatalogVersion(catalog_.incrementAndGetCatalogVersion());
+      Db ret = catalog_.addDb(db.getName(), db.getMetaStoreDb());
+      if (ret != null) {
+        ++numDbsAdded;
+        response.result.addToUpdated_catalog_objects(db.toTCatalogObject());
+      }
+    }
+
+    int numTblsAdded = 0;
+    int numViewsAdded = 0;
+    for(TTable tTable: testCaseData.tables_and_views) {
+      Db db = catalog_.getDb(tTable.db_name);
+      // Db should have been created by now.
+      Preconditions.checkNotNull(db, String.format("Missing db %s", tTable.db_name));
+      Table t = Table.fromThrift(db, tTable);
+      // Set a new version to force an overwrite if a table already exists with the same
+      // name.
+      t.setCatalogVersion(catalog_.incrementAndGetCatalogVersion());
+      catalog_.addTable(db, t);
+      if (t instanceof View) {
+        ++numViewsAdded;
+      } else {
+        ++numTblsAdded;
+      }
+      // The table lock is needed here since toTCatalogObject() calls Table#toThrift()
+      // which expects the current thread to hold this lock. For more details refer
+      // to IMPALA-4092.
+      t.getLock().lock();
+      try {
+        response.result.addToUpdated_catalog_objects(t.toTCatalogObject());
+      } finally {
+        t.getLock().unlock();
+      }
+    }
+    StringBuilder responseStr = new StringBuilder();
+    responseStr.append(String.format("Testcase generated using Impala version %s. ",
+        testCaseData.getImpala_version()));
+    responseStr.append(String.format(
+        "%d db(s), %d table(s) and %d view(s) imported for query: ", numDbsAdded,
+        numTblsAdded, numViewsAdded));
+    responseStr.append("\n\n").append(testCaseData.getQuery_stmt());
+    LOG.info(String.format("%s. Testcase path: %s", responseStr, inputPath));
+    addSummary(response, responseStr.toString());
+    return testCaseData.getQuery_stmt();
+  }
+
+  /**
    * Create result set from string 'summary', and attach it to 'response'.
    */
   private void addSummary(TDdlExecResponse response, String summary) {
diff --git a/fe/src/main/java/org/apache/impala/service/Frontend.java b/fe/src/main/java/org/apache/impala/service/Frontend.java
index 9fe1e96..170cbb9 100644
--- a/fe/src/main/java/org/apache/impala/service/Frontend.java
+++ b/fe/src/main/java/org/apache/impala/service/Frontend.java
@@ -18,7 +18,6 @@
 package org.apache.impala.service;
 
 import java.io.IOException;
-import java.io.StringReader;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -39,6 +38,7 @@ import org.apache.impala.analysis.AlterDbStmt;
 import org.apache.impala.analysis.AnalysisContext;
 import org.apache.impala.analysis.AnalysisContext.AnalysisResult;
 import org.apache.impala.analysis.CommentOnStmt;
+import org.apache.impala.analysis.CopyTestCaseStmt;
 import org.apache.impala.analysis.CreateDataSrcStmt;
 import org.apache.impala.analysis.CreateDropRoleStmt;
 import org.apache.impala.analysis.CreateUdaStmt;
@@ -53,14 +53,11 @@ import org.apache.impala.analysis.GrantRevokePrivStmt;
 import org.apache.impala.analysis.GrantRevokeRoleStmt;
 import org.apache.impala.analysis.InsertStmt;
 import org.apache.impala.analysis.Parser;
-import org.apache.impala.analysis.Parser.ParseException;
 import org.apache.impala.analysis.QueryStmt;
 import org.apache.impala.analysis.ResetMetadataStmt;
 import org.apache.impala.analysis.ShowFunctionsStmt;
 import org.apache.impala.analysis.ShowGrantPrincipalStmt;
 import org.apache.impala.analysis.ShowRolesStmt;
-import org.apache.impala.analysis.SqlParser;
-import org.apache.impala.analysis.SqlScanner;
 import org.apache.impala.analysis.StatementBase;
 import org.apache.impala.analysis.StmtMetadataLoader;
 import org.apache.impala.analysis.StmtMetadataLoader.StmtTableCache;
@@ -92,7 +89,6 @@ import org.apache.impala.catalog.ImpaladCatalog;
 import org.apache.impala.catalog.ImpaladTableUsageTracker;
 import org.apache.impala.catalog.Type;
 import org.apache.impala.catalog.local.InconsistentMetadataFetchException;
-import org.apache.impala.common.AnalysisException;
 import org.apache.impala.common.FileSystemUtil;
 import org.apache.impala.common.ImpalaException;
 import org.apache.impala.common.InternalException;
@@ -102,7 +98,6 @@ import org.apache.impala.planner.HdfsScanNode;
 import org.apache.impala.planner.PlanFragment;
 import org.apache.impala.planner.Planner;
 import org.apache.impala.planner.ScanNode;
-import org.apache.impala.thrift.TAdminRequest;
 import org.apache.impala.thrift.TAlterDbParams;
 import org.apache.impala.thrift.TCatalogOpRequest;
 import org.apache.impala.thrift.TCatalogOpType;
@@ -125,6 +120,7 @@ import org.apache.impala.thrift.TGrantRevokeRoleParams;
 import org.apache.impala.thrift.TLineageGraph;
 import org.apache.impala.thrift.TLoadDataReq;
 import org.apache.impala.thrift.TLoadDataResp;
+import org.apache.impala.thrift.TCopyTestCaseReq;
 import org.apache.impala.thrift.TMetadataOpRequest;
 import org.apache.impala.thrift.TPlanExecInfo;
 import org.apache.impala.thrift.TPlanFragment;
@@ -138,7 +134,6 @@ import org.apache.impala.thrift.TResultSet;
 import org.apache.impala.thrift.TResultSetMetadata;
 import org.apache.impala.thrift.TShowFilesParams;
 import org.apache.impala.thrift.TShowStatsOp;
-import org.apache.impala.thrift.TShutdownParams;
 import org.apache.impala.thrift.TStmtType;
 import org.apache.impala.thrift.TTableName;
 import org.apache.impala.thrift.TUpdateCatalogCacheRequest;
@@ -606,6 +601,14 @@ public class Frontend {
       req.setAlter_db_params(params);
       ddl.op_type = TCatalogOpType.DDL;
       ddl.setDdl_params(req);
+    } else if (analysis.isTestCaseStmt()){
+      CopyTestCaseStmt stmt = (CopyTestCaseStmt) analysis.getStmt();
+      TCopyTestCaseReq req = new TCopyTestCaseReq(stmt.getHdfsPath());
+      TDdlExecRequest ddlReq = new TDdlExecRequest();
+      ddlReq.setCopy_test_case_params(req);
+      ddlReq.setDdl_type(TDdlType.COPY_TESTCASE);
+      ddl.op_type = TCatalogOpType.DDL;
+      ddl.setDdl_params(ddlReq);
     } else {
       throw new IllegalStateException("Unexpected CatalogOp statement type.");
     }
@@ -1318,7 +1321,21 @@ public class Frontend {
           new TColumn("summary", Type.STRING.toThrift()))));
       result.setAdmin_request(analysisResult.getAdminFnStmt().toThrift());
       return result;
+    } else if (analysisResult.isTestCaseStmt()) {
+      CopyTestCaseStmt testCaseStmt = ((CopyTestCaseStmt) stmt);
+      if (testCaseStmt.isTestCaseExport()) {
+        result.setStmt_type(TStmtType.TESTCASE);
+        result.setResult_set_metadata(new TResultSetMetadata(Arrays.asList(
+          new TColumn("Test case data output path", Type.STRING.toThrift()))));
+        result.setTestcase_data_path(testCaseStmt.writeTestCaseData());
+      } else {
+        // Mimic it as a DDL.
+        result.setStmt_type(TStmtType.DDL);
+        createCatalogOpRequest(analysisResult, result);
+      }
+      return result;
     }
+
     // If unset, set MT_DOP to 0 to simplify the rest of the code.
     if (!queryOptions.isSetMt_dop()) queryOptions.setMt_dop(0);
 
diff --git a/fe/src/main/jflex/sql-scanner.flex b/fe/src/main/jflex/sql-scanner.flex
index 54ab07e..0867787 100644
--- a/fe/src/main/jflex/sql-scanner.flex
+++ b/fe/src/main/jflex/sql-scanner.flex
@@ -96,6 +96,7 @@ import org.apache.impala.thrift.TReservedWordsVersion;
     keywordMap.put("comment", SqlParserSymbols.KW_COMMENT);
     keywordMap.put("compression", SqlParserSymbols.KW_COMPRESSION);
     keywordMap.put("compute", SqlParserSymbols.KW_COMPUTE);
+    keywordMap.put("copy", SqlParserSymbols.KW_COPY);
     keywordMap.put("create", SqlParserSymbols.KW_CREATE);
     keywordMap.put("cross", SqlParserSymbols.KW_CROSS);
     keywordMap.put("current", SqlParserSymbols.KW_CURRENT);
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzerTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzerTest.java
index 4b78d76..a562143 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzerTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzerTest.java
@@ -310,6 +310,25 @@ public class AnalyzerTest extends FrontendTestBase {
   }
 
   @Test
+  public void TestCopyTestCase() {
+    AnalyzesOk("copy testcase to 'hdfs:///tmp' select * from functional.alltypes");
+    AnalyzesOk("copy testcase to 'hdfs:///tmp' select * from functional.alltypes union " +
+        "select * from functional.alltypes");
+    // Containing views
+    AnalyzesOk("copy testcase to 'hdfs:///tmp' select * from functional.alltypes_view");
+    // Mix of view and table
+    AnalyzesOk("copy testcase to 'hdfs:///tmp' select * from functional.alltypes_view " +
+        "union all select * from functional.alltypes");
+    AnalyzesOk("copy testcase to 'hdfs:///tmp' with v as (select 1) select * from v");
+    // Target directory does not exist
+    AnalysisError("copy testcase to 'hdfs:///foo' select 1", "Path does not exist: " +
+        "hdfs://localhost:20500/foo");
+    // Testcase file does not exist
+    AnalysisError("copy testcase from 'hdfs:///tmp/file-doesnot-exist'", "Path does not" +
+        " exist");
+  }
+
+  @Test
   public void TestBinaryHBaseTable() {
     AnalyzesOk("select * from functional_hbase.alltypessmallbinary");
   }
diff --git a/fe/src/test/java/org/apache/impala/analysis/AuthorizationStmtTest.java b/fe/src/test/java/org/apache/impala/analysis/AuthorizationStmtTest.java
index 541310f..d2e7a2a 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AuthorizationStmtTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AuthorizationStmtTest.java
@@ -339,6 +339,99 @@ public class AuthorizationStmtTest extends FrontendTestBase {
   }
 
   @Test
+  public void testCopyTestCasePrivileges() throws ImpalaException {
+    // Used for select *, with, and union
+    Set<String> expectedAuthorizables = Sets.newHashSet(
+        "functional", // For including the DB related metadata in the testcase file.
+        "functional.alltypes",
+        "functional.alltypes.id",
+        "functional.alltypes.bool_col",
+        "functional.alltypes.tinyint_col",
+        "functional.alltypes.smallint_col",
+        "functional.alltypes.int_col",
+        "functional.alltypes.bigint_col",
+        "functional.alltypes.float_col",
+        "functional.alltypes.double_col",
+        "functional.alltypes.date_string_col",
+        "functional.alltypes.string_col",
+        "functional.alltypes.timestamp_col",
+        "functional.alltypes.year",
+        "functional.alltypes.month",
+        "hdfs://localhost:20500/tmp" // For the testcase output URI
+    );
+
+    // Select *
+    verifyPrivilegeReqs("copy testcase to '/tmp' select * from functional" +
+        ".alltypes", expectedAuthorizables);
+    verifyPrivilegeReqs("copy testcase to '/tmp' select alltypes.* from " +
+        "functional.alltypes", expectedAuthorizables);
+    verifyPrivilegeReqs(createAnalysisCtx("functional"), "copy testcase to " +
+        "'/tmp'  select * from alltypes", expectedAuthorizables);
+    verifyPrivilegeReqs(createAnalysisCtx("functional"),
+        "copy testcase to '/tmp' select alltypes.* from alltypes",
+        expectedAuthorizables);
+    verifyPrivilegeReqs("copy testcase to '/tmp' select a.* from functional" +
+        ".alltypes a", expectedAuthorizables);
+
+    // With clause.
+    verifyPrivilegeReqs("copy testcase to '/tmp' with t as (select * from " +
+        "functional.alltypes) select * from t", expectedAuthorizables);
+    verifyPrivilegeReqs(createAnalysisCtx("functional"),
+        "copy testcase to '/tmp' with t as (select * from alltypes) select * " +
+            "from t", expectedAuthorizables);
+
+    // Union.
+    verifyPrivilegeReqs("copy testcase to '/tmp' select * from functional" +
+        ".alltypes union all select * from functional.alltypes", expectedAuthorizables);
+    verifyPrivilegeReqs(createAnalysisCtx("functional"), "copy testcase to '/tmp'"
+            + "select * from alltypes union all select * from" + " alltypes",
+        expectedAuthorizables);
+
+    // Select a specific column.
+    expectedAuthorizables = Sets.newHashSet(
+        "functional",
+        "functional.alltypes",
+        "functional.alltypes.id",
+        "hdfs://localhost:20500/tmp"
+    );
+    verifyPrivilegeReqs("copy testcase to '/tmp' select id from " +
+        "functional.alltypes", expectedAuthorizables);
+    verifyPrivilegeReqs("copy testcase to '/tmp' select alltypes.id from " +
+            "functional.alltypes", expectedAuthorizables);
+    verifyPrivilegeReqs(createAnalysisCtx("functional"),
+        "copy testcase to '/tmp' select alltypes.id from alltypes",
+        expectedAuthorizables);
+    verifyPrivilegeReqs(createAnalysisCtx("functional"), "copy testcase to " +
+            "'/tmp' select id from alltypes", expectedAuthorizables);
+    verifyPrivilegeReqs("copy testcase to '/tmp' select alltypes.id from " +
+            "functional.alltypes", expectedAuthorizables);
+    verifyPrivilegeReqs("copy testcase to '/tmp' select a.id from functional" +
+        ".alltypes a", expectedAuthorizables);
+
+    // Verify VIEW_METADATA privileges on authorizables.
+    final String copyTestCasePrefix = "copy testcase to '/tmp' ";
+    for (AuthzTest authzTest: new AuthzTest[] {
+        authorize(copyTestCasePrefix + "with t as (select id from functional.alltypes) " +
+            "select * from t"),
+        authorize(copyTestCasePrefix + "select id from functional.alltypes")}) {
+      authzTest
+          // Ideal case, when all the privileges are in place.
+          .ok(onUri(false, "/tmp", TPrivilegeLevel.ALL),
+              onDatabase("functional", viewMetadataPrivileges()),
+              onTable("functional", "alltypes", viewMetadataPrivileges()))
+          // DB does not have the metadata access privileges
+          .error(accessError("functional"),
+              onUri(false, "/tmp", TPrivilegeLevel.ALL),
+              onDatabase("functional", allExcept(viewMetadataPrivileges())),
+              onTable("functional", "alltypes", viewMetadataPrivileges()))
+          // URI does not have ALL privilege.
+          .error(accessError("hdfs://localhost:20500/tmp"),
+              onDatabase("functional", viewMetadataPrivileges()),
+              onTable("functional", "alltypes", viewMetadataPrivileges()));
+    }
+  }
+
+  @Test
   public void testSelect() throws ImpalaException {
     for (AuthzTest authzTest: new AuthzTest[]{
         // Select a specific column on a table.
diff --git a/fe/src/test/java/org/apache/impala/analysis/ParserTest.java b/fe/src/test/java/org/apache/impala/analysis/ParserTest.java
index 9d4e977..e050ea7 100644
--- a/fe/src/test/java/org/apache/impala/analysis/ParserTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/ParserTest.java
@@ -90,6 +90,28 @@ public class ParserTest extends FrontendTestBase {
   }
 
   @Test
+  public void TestCopyTestCase() {
+    // Only QueryStmts are supported
+    ParsesOk("copy testcase to 'hdfs:///foo' select * from tbl");
+    ParsesOk("copy testcase to 'hdfs:///foo' with v as (select 1) select * from v");
+    ParsesOk("copy testcase to 'hdfs:///foo' select * from t1 union select * from t2");
+    // non QueryStmts aren not supported.
+    ParserError("copy testcase to 'hdfs:///foo' alter table foo add partition (p=1)");
+    ParserError("copy testcase to 'hdfs:///foo' insert into t values (1)");
+    // missing output directory.
+    ParserError("copy testcase to select * from tbl");
+    // missing quotes for the directory path.
+    ParserError("copy testcase to hdfs:///foo select * from tbl");
+
+    ParsesOk("copy testcase from 'hdfs:///foo'");
+    // missing quotes.
+    ParserError("copy testcase from hdfs:///foo");
+    ParserError("copy testcase");
+    // testcase is not a reserved word.
+    ParsesOk("select testcase from foo");
+  }
+
+  @Test
   public void TestNoFromClause() {
     ParsesOk("select 1 + 1, 'two', f(3), a + b");
     ParserError("select 1 + 1 'two' f(3) a + b");
@@ -3262,9 +3284,9 @@ public class ParserTest extends FrontendTestBase {
         "c, b, c from t\n" +
         "^\n" +
         "Encountered: IDENTIFIER\n" +
-        "Expected: ALTER, COMMENT, COMPUTE, CREATE, DELETE, DESCRIBE, DROP, EXPLAIN, " +
-        "GRANT, INSERT, INVALIDATE, LOAD, REFRESH, REVOKE, SELECT, SET, SHOW, " +
-        "TRUNCATE, UPDATE, UPSERT, USE, VALUES, WITH\n");
+        "Expected: ALTER, COMMENT, COMPUTE, COPY, CREATE, DELETE, DESCRIBE, DROP, " +
+            "EXPLAIN, GRANT, INSERT, INVALIDATE, LOAD, REFRESH, REVOKE, SELECT, SET, " +
+            "SHOW, TRUNCATE, UPDATE, UPSERT, USE, VALUES, WITH\n");
 
     // missing select list
     ParserError("select from t",
diff --git a/fe/src/test/java/org/apache/impala/planner/TestCaseLoaderTest.java b/fe/src/test/java/org/apache/impala/planner/TestCaseLoaderTest.java
new file mode 100644
index 0000000..ff289c4
--- /dev/null
+++ b/fe/src/test/java/org/apache/impala/planner/TestCaseLoaderTest.java
@@ -0,0 +1,75 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.planner;
+
+import com.google.common.base.Preconditions;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.impala.catalog.Catalog;
+import org.apache.impala.catalog.Db;
+import org.apache.impala.common.FileSystemUtil;
+import org.apache.impala.testutil.PlannerTestCaseLoader;
+import org.apache.impala.util.PatternMatcher;
+import org.apache.log4j.Logger;
+import org.junit.Test;
+
+import java.util.List;
+import java.util.Random;
+
+public class TestCaseLoaderTest {
+  private static final Logger LOG = Logger.getLogger(TestCaseLoaderTest.class);
+
+  // Testcase files are loaded along with the test-data snapshot. Refer to
+  // create-tpcds-testcase-files.sh for details.
+  private static final Path TESTCASE_DATA_DIR = new
+      Path("/test-warehouse/tpcds-testcase-data");
+
+  private static final String TESTCASE_FILE_PREFIX = "impala-testcase-data";
+
+  /**
+   * Randomly picks 10 testcase files from TESTCASE_DATA_DIR and loads them into a
+   * clean Catalog and makes sure that the query statement from the testcase can be
+   * planned correctly without any errors.
+   */
+  @Test
+  public void testTestCaseImport() throws Exception {
+    FileStatus[] testCaseFiles = FileSystemUtil.listStatus(FileSystemUtil
+        .getFileSystemForPath(TESTCASE_DATA_DIR), TESTCASE_DATA_DIR);
+    // Randomly pick testcases and try to replay them.
+    Random rand = new Random();
+    int maxIterations = 10;
+    Preconditions.checkState(testCaseFiles.length > maxIterations);
+    for (int i = 0; i < maxIterations; ++i) {
+      FileStatus fs = testCaseFiles[rand.nextInt(testCaseFiles.length)];
+      if (!fs.getPath().getName().contains(TESTCASE_FILE_PREFIX)) continue;
+      try (PlannerTestCaseLoader testCaseLoader = new PlannerTestCaseLoader()) {
+        Catalog srcCatalog = testCaseLoader.getSrcCatalog();
+        // Make sure the catalog is empty (just the default database).
+        List<Db> dbs = srcCatalog.getDbs(PatternMatcher.MATCHER_MATCH_ALL);
+        assert dbs.size() == 1 && dbs.get(0).getName().equals("default");
+        // TODO: Include the source cluster plan in the testcase file and compare it
+        // here with the plan computed from the local metadata.
+        LOG.info(testCaseLoader.loadTestCase(fs.getPath().toString()));
+        dbs = srcCatalog.getDbs(PatternMatcher.MATCHER_MATCH_ALL);
+        // Atleast one new database should be loaded per testcase.
+        assert dbs.size() > 1;
+      }
+    }
+  }
+}
+
diff --git a/fe/src/test/java/org/apache/impala/testutil/CatalogServiceTestCatalog.java b/fe/src/test/java/org/apache/impala/testutil/CatalogServiceTestCatalog.java
index 3310ecf..ba36fdc 100644
--- a/fe/src/test/java/org/apache/impala/testutil/CatalogServiceTestCatalog.java
+++ b/fe/src/test/java/org/apache/impala/testutil/CatalogServiceTestCatalog.java
@@ -20,10 +20,15 @@ package org.apache.impala.testutil;
 import org.apache.impala.authorization.SentryConfig;
 import org.apache.impala.catalog.AuthorizationPolicy;
 import org.apache.impala.catalog.CatalogServiceCatalog;
+import org.apache.impala.catalog.MetaStoreClientPool;
 import org.apache.impala.common.ImpalaException;
 import org.apache.impala.service.FeSupport;
 import org.apache.impala.thrift.TUniqueId;
 
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.UUID;
+
 /**
  * Test class of the Catalog Server's catalog that exposes internal state that is useful
  * for testing.
@@ -31,10 +36,10 @@ import org.apache.impala.thrift.TUniqueId;
 public class CatalogServiceTestCatalog extends CatalogServiceCatalog {
 
   public CatalogServiceTestCatalog(boolean loadInBackground, int numLoadingThreads,
-      int initialHmsCnxnTimeoutSec, SentryConfig sentryConfig,
-      TUniqueId catalogServiceId) throws ImpalaException {
-    super(loadInBackground, numLoadingThreads, initialHmsCnxnTimeoutSec, sentryConfig,
-        catalogServiceId, null, System.getProperty("java.io.tmpdir"));
+      SentryConfig sentryConfig, TUniqueId catalogServiceId,
+      MetaStoreClientPool metaStoreClientPool) throws ImpalaException {
+    super(loadInBackground, numLoadingThreads, sentryConfig, catalogServiceId, null,
+        System.getProperty("java.io.tmpdir"), metaStoreClientPool);
 
     // Cache pools are typically loaded asynchronously, but as there is no fixed execution
     // order for tests, the cache pools are loaded synchronously before the tests are
@@ -55,7 +60,8 @@ public class CatalogServiceTestCatalog extends CatalogServiceCatalog {
     FeSupport.loadLibrary();
     CatalogServiceCatalog cs;
     try {
-      cs = new CatalogServiceTestCatalog(false, 16, 0, config, new TUniqueId());
+      cs = new CatalogServiceTestCatalog(false, 16, config, new TUniqueId(), new
+          MetaStoreClientPool(0, 0));
       cs.reset();
     } catch (ImpalaException e) {
       throw new IllegalStateException(e.getMessage(), e);
@@ -63,6 +69,23 @@ public class CatalogServiceTestCatalog extends CatalogServiceCatalog {
     return cs;
   }
 
+
+  /**
+   * Creates a transient test catalog instance backed by an embedded HMS derby database on
+   * the local filesystem. The derby database is created from scratch and has no table
+   * metadata.
+   */
+  public static CatalogServiceCatalog createTransientTestCatalog() throws
+      ImpalaException {
+    FeSupport.loadLibrary();
+    Path derbyPath = Paths.get(System.getProperty("java.io.tmpdir"),
+        UUID.randomUUID().toString());
+    CatalogServiceCatalog cs = new CatalogServiceTestCatalog(false, 16, null,
+        new TUniqueId(), new EmbeddedMetastoreClientPool(0, derbyPath));
+    cs.reset();
+    return cs;
+  }
+
   @Override
   public AuthorizationPolicy getAuthPolicy() { return authPolicy_; }
 }
diff --git a/fe/src/test/java/org/apache/impala/testutil/EmbeddedMetastoreClientPool.java b/fe/src/test/java/org/apache/impala/testutil/EmbeddedMetastoreClientPool.java
new file mode 100644
index 0000000..7e68d8e
--- /dev/null
+++ b/fe/src/test/java/org/apache/impala/testutil/EmbeddedMetastoreClientPool.java
@@ -0,0 +1,76 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.testutil;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.impala.catalog.MetaStoreClientPool;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.log4j.Logger;
+
+import java.nio.file.Path;
+
+/**
+ * An implementation of MetastoreClientPool that creates HiveMetastoreClient objects
+ * with HMS running in an embedded fashion on the client side. It connects to a Derby
+ * database backed by local file system storage.
+ *
+ * Since local Derby db allows a single connection at any point, there is no use in
+ * creating a metastore client pool bigger than that size.
+ */
+public class EmbeddedMetastoreClientPool extends  MetaStoreClientPool {
+
+  private static final Logger LOG = Logger.getLogger(EmbeddedMetastoreClientPool.class);
+
+  private static final String CONNECTION_URL_TEMPLATE =
+      "jdbc:derby:;databaseName=%s;create=true";
+
+  private Path derbyDataStorePath_;
+
+  public EmbeddedMetastoreClientPool(int initialCnxnTimeoutSec, Path dbStorePath) {
+    super(1, initialCnxnTimeoutSec, generateEmbeddedHMSConf(dbStorePath));
+    derbyDataStorePath_ = dbStorePath;
+  }
+
+  /**
+   * Generates the HiveConf required to connect to an embedded metastore backed by
+   * derby DB.
+   */
+  private static HiveConf generateEmbeddedHMSConf(Path dbStorePath) {
+    LOG.info("Creating embedded HMS instance at path: " + dbStorePath);
+    HiveConf conf = new HiveConf(EmbeddedMetastoreClientPool.class);
+    // An embedded HMS with local derby backend requires the following settings
+    // hive.metastore.uris - empty
+    // javax.jdo.option.ConnectionDriverName - org.apache.derby.jdbc.EmbeddedDriver
+    // javax.jdo.option.ConnectionURL - jdbc:derby:;databaseName=<path>;create=true"
+    conf.set(HiveConf.ConfVars.METASTOREURIS.toString(), "");
+    conf.set(HiveConf.ConfVars.METASTORE_CONNECTION_DRIVER.toString(),
+        "org.apache.derby.jdbc.EmbeddedDriver");
+    conf.setBoolean(HiveConf.ConfVars.METASTORE_SCHEMA_VERIFICATION.toString(), false);
+    conf.setBoolean(HiveConf.ConfVars.METASTORE_AUTO_CREATE_ALL.toString(), true);
+    conf.set(HiveConf.ConfVars.METASTORECONNECTURLKEY.toString(),
+        String.format(CONNECTION_URL_TEMPLATE, dbStorePath.toString()));
+    return conf;
+  }
+
+  @Override
+  public void close() {
+    super.close();
+    // Cleanup the metastore directory.
+    FileUtils.deleteQuietly(derbyDataStorePath_.toFile());
+  }
+}
diff --git a/fe/src/test/java/org/apache/impala/testutil/ImpaladTestCatalog.java b/fe/src/test/java/org/apache/impala/testutil/ImpaladTestCatalog.java
index 3395c54..1683b2c 100644
--- a/fe/src/test/java/org/apache/impala/testutil/ImpaladTestCatalog.java
+++ b/fe/src/test/java/org/apache/impala/testutil/ImpaladTestCatalog.java
@@ -20,10 +20,10 @@ package org.apache.impala.testutil;
 import com.google.common.base.Preconditions;
 import org.apache.impala.analysis.TableName;
 import org.apache.impala.authorization.AuthorizationConfig;
+import org.apache.impala.catalog.BuiltinsDb;
 import org.apache.impala.catalog.CatalogException;
 import org.apache.impala.catalog.CatalogServiceCatalog;
 import org.apache.impala.catalog.Db;
-import org.apache.impala.catalog.FeDb;
 import org.apache.impala.catalog.HdfsCachePool;
 import org.apache.impala.catalog.HdfsTable;
 import org.apache.impala.catalog.ImpaladCatalog;
@@ -35,6 +35,7 @@ import org.apache.impala.thrift.TPrivilege;
 import org.apache.impala.util.PatternMatcher;
 
 import java.util.HashSet;
+import java.util.List;
 import java.util.Set;
 
 /**
@@ -57,21 +58,54 @@ public class ImpaladTestCatalog extends ImpaladCatalog {
     CatalogServiceCatalog catalogServerCatalog = authzConfig.isEnabled() ?
         CatalogServiceTestCatalog.createWithAuth(authzConfig.getSentryConfig()) :
         CatalogServiceTestCatalog.create();
-    // Bootstrap the catalog by adding all dbs, tables, and functions.
-    for (FeDb db: catalogServerCatalog.getDbs(PatternMatcher.MATCHER_MATCH_ALL)) {
-      // Adding DB should include all tables/fns in that database.
-      addDb((Db)db);
-    }
     authPolicy_ = catalogServerCatalog.getAuthPolicy();
     srcCatalog_ = catalogServerCatalog;
+    srcCatalog_.addDb(BuiltinsDb.getInstance());
+    setIsReady(true);
+  }
+
+  /**
+   * Creates ImpaladTestCatalog backed by a given catalog instance.
+   */
+  public ImpaladTestCatalog(CatalogServiceCatalog catalog) {
+    super("127.0.0.1");
+    srcCatalog_ = Preconditions.checkNotNull(catalog);
+    authPolicy_ = srcCatalog_.getAuthPolicy();
     setIsReady(true);
   }
 
   @Override
+  public void addDb(Db db) {
+    // Builtins are loaded explicitly after the srcCatalog_ is initialized.
+    if (db == BuiltinsDb.getInstance()) return;
+    srcCatalog_.addDb(db);
+  }
+
+  @Override
+  public Db removeDb(String dbName) {
+    return srcCatalog_.removeDb(dbName);
+  }
+
+  /**
+   * Delegates the getDb() request to the source catalog.
+   */
+  public Db getDb(String dbName) {
+    if (dbName.equals(BuiltinsDb.NAME)) return BuiltinsDb.getInstance();
+    return srcCatalog_.getDb(dbName);
+  }
+
+  @Override
+  public List<Db> getDbs(PatternMatcher matcher) {
+    return srcCatalog_.getDbs(matcher);
+  }
+
+  @Override
   public HdfsCachePool getHdfsCachePool(String poolName) {
     return srcCatalog_.getHdfsCachePool(poolName);
   }
 
+  public CatalogServiceCatalog getSrcCatalog() { return srcCatalog_; }
+
   /**
    * Reloads all metadata from the source catalog.
    */
diff --git a/fe/src/test/java/org/apache/impala/testutil/PlannerTestCaseLoader.java b/fe/src/test/java/org/apache/impala/testutil/PlannerTestCaseLoader.java
new file mode 100644
index 0000000..27a0604
--- /dev/null
+++ b/fe/src/test/java/org/apache/impala/testutil/PlannerTestCaseLoader.java
@@ -0,0 +1,97 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.testutil;
+
+import org.apache.impala.authorization.AuthorizationConfig;
+import org.apache.impala.catalog.Catalog;
+import org.apache.impala.common.ImpalaException;
+import org.apache.impala.service.BackendConfig;
+import org.apache.impala.service.CatalogOpExecutor;
+import org.apache.impala.service.Frontend;
+import org.apache.impala.thrift.TBackendGflags;
+import org.apache.impala.thrift.TCatalogUpdateResult;
+import org.apache.impala.thrift.TCopyTestCaseReq;
+import org.apache.impala.thrift.TDdlExecResponse;
+import org.apache.impala.thrift.TQueryCtx;
+import org.apache.impala.thrift.TQueryOptions;
+
+/**
+ * A util class that loads a given testcase file into an in-memory Catalog
+ * and runs EXPLAIN on the testcase query statement. The catalog is backed by a
+ * transient HMS instanced based on derby DB.
+ *
+ * Runs in a standalone mode without having to start the Impala cluster.
+ * Expects the testcase path as the only argument.
+ * Ex: ...PlannerTestCaseLoader hdfs:///tmp/impala-testcase-data-9642a6f6...
+ *
+ * Full command example using maven-exec plugin (for an Impala DEV environment):
+ *
+ * mvn install exec:java -Dexec.classpathScope="test" \
+ *    -Dexec.mainClass="org.apache.impala.testutil.PlannerTestCaseLoader" \
+ *    -Dexec.args="/tmp/impala-testcase-data-9642a6f6-6a0b-48b2-a61f-f7ce55b92fee"
+ *
+ * Running through Maven is just for convenience. Can also be invoked directly using the
+ * Java binary by setting appropriate classpath.
+ *
+ */
+public class PlannerTestCaseLoader implements AutoCloseable {
+
+  private final CatalogOpExecutor catalogOpExecutor_;
+  private final ImpaladTestCatalog catalog_;
+  private final Frontend frontend_;
+
+  public PlannerTestCaseLoader() throws ImpalaException {
+    AuthorizationConfig config = AuthorizationConfig.createAuthDisabledConfig();
+    catalog_ = new ImpaladTestCatalog(
+        CatalogServiceTestCatalog.createTransientTestCatalog());
+    frontend_ = new Frontend(config, catalog_);
+    catalogOpExecutor_ = new CatalogOpExecutor(catalog_.getSrcCatalog());
+  }
+
+  public Catalog getSrcCatalog() { return catalog_.getSrcCatalog(); }
+
+  /**
+   * Loads the testcase from a given path and returns the EXPLAIN string for the
+   * testcase query statement.
+   */
+  public String loadTestCase(String testCasePath) throws Exception {
+    String stmt = catalogOpExecutor_.copyTestCaseData(new TCopyTestCaseReq(testCasePath),
+        new TDdlExecResponse(new TCatalogUpdateResult()));
+    TQueryCtx queryCtx = TestUtils.createQueryContext(
+        new TQueryOptions().setPlanner_testcase_mode(true));
+    queryCtx.client_request.setStmt(stmt);
+    return frontend_.getExplainString(queryCtx);
+  }
+
+  @Override
+  public void close() {
+    getSrcCatalog().close();
+  }
+
+  public static void main(String[] args) throws Exception {
+    if (args.length != 1) {
+      throw new IllegalArgumentException(String.format("Incorrect number of args. " +
+          "Expected 1 argument, found %d. Valid usage: PlannerTestCaseLoader " +
+          "<testcase path>", args.length));
+    }
+    try (PlannerTestCaseLoader testCaseLoader = new PlannerTestCaseLoader()) {
+      System.out.println(testCaseLoader.loadTestCase(args[0]));
+    }
+    System.exit(0);
+  }
+}
diff --git a/testdata/bin/create-load-data.sh b/testdata/bin/create-load-data.sh
index 8a26ad6..a32d44c 100755
--- a/testdata/bin/create-load-data.sh
+++ b/testdata/bin/create-load-data.sh
@@ -627,4 +627,7 @@ fi
 run-step "Computing table stats" compute-table-stats.log \
     ${IMPALA_HOME}/testdata/bin/compute-table-stats.sh
 
+run-step "Creating tpcds testcase data" create-tpcds-testcase-data.log \
+    ${IMPALA_HOME}/testdata/bin/create-tpcds-testcase-files.sh
+
 run-step "Copying auth policy file" copy-auth-policy.log copy-auth-policy
diff --git a/testdata/bin/create-tpcds-testcase-files.sh b/testdata/bin/create-tpcds-testcase-files.sh
new file mode 100755
index 0000000..42c6897
--- /dev/null
+++ b/testdata/bin/create-tpcds-testcase-files.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This script generates tesecase files for tpcds queries.
+# These testcases are then replayed on an empty Catalog to make
+# sure that the import works fine.
+
+set -euo pipefail
+. $IMPALA_HOME/bin/report_build_error.sh
+setup_report_build_error
+set -x
+
+. ${IMPALA_HOME}/bin/impala-config.sh > /dev/null 2>&1
+IMPALAD=${IMPALAD:-localhost:21000}
+
+TPCDS_QUERY_HOME=$IMPALA_HOME/testdata/workloads/tpcds/queries/raw
+# Target directory containing the testcase data.
+TESTCASE_DATA_DIR=/test-warehouse/tpcds-testcase-data
+
+COPY_TEST_CASE_PREFIX="COPY TESTCASE TO '$TESTCASE_DATA_DIR'"
+
+# Clean-up if the directory already exists.
+hadoop fs -rm -r -f $TESTCASE_DATA_DIR
+hadoop fs -mkdir $TESTCASE_DATA_DIR
+
+for file in $TPCDS_QUERY_HOME/tpcds-query*.sql
+do
+  echo "Generating testcase for $file"
+  ${IMPALA_HOME}/bin/impala-shell.sh -i ${IMPALAD} \
+  -d "tpcds" -q "$COPY_TEST_CASE_PREFIX $(< $file)"
+done
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query1.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query1.sql
new file mode 100644
index 0000000..7c55db6
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query1.sql
@@ -0,0 +1,25 @@
+-- start query 1 in stream 0 using template query1.tpl
+with customer_total_return as
+(select sr_customer_sk as ctr_customer_sk
+,sr_store_sk as ctr_store_sk
+,sum(SR_FEE) as ctr_total_return
+from store_returns
+,date_dim
+where sr_returned_date_sk = d_date_sk
+and d_year =2000
+group by sr_customer_sk
+,sr_store_sk)
+ select  c_customer_id
+from customer_total_return ctr1
+,store
+,customer
+where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+from customer_total_return ctr2
+where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+and s_store_sk = ctr1.ctr_store_sk
+and s_state = 'NM'
+and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+limit 100;
+
+-- end query 1 in stream 0 using template query1.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query11.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query11.sql
new file mode 100644
index 0000000..d3caf12
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query11.sql
@@ -0,0 +1,81 @@
+-- start query 1 in stream 0 using template query11.tpl
+with year_total as (
+ select c_customer_id customer_id
+       ,c_first_name customer_first_name
+       ,c_last_name customer_last_name
+       ,c_preferred_cust_flag customer_preferred_cust_flag
+       ,c_birth_country customer_birth_country
+       ,c_login customer_login
+       ,c_email_address customer_email_address
+       ,d_year dyear
+       ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total
+       ,'s' sale_type
+ from customer
+     ,store_sales
+     ,date_dim
+ where c_customer_sk = ss_customer_sk
+   and ss_sold_date_sk = d_date_sk
+ group by c_customer_id
+         ,c_first_name
+         ,c_last_name
+         ,c_preferred_cust_flag 
+         ,c_birth_country
+         ,c_login
+         ,c_email_address
+         ,d_year 
+ union all
+ select c_customer_id customer_id
+       ,c_first_name customer_first_name
+       ,c_last_name customer_last_name
+       ,c_preferred_cust_flag customer_preferred_cust_flag
+       ,c_birth_country customer_birth_country
+       ,c_login customer_login
+       ,c_email_address customer_email_address
+       ,d_year dyear
+       ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total
+       ,'w' sale_type
+ from customer
+     ,web_sales
+     ,date_dim
+ where c_customer_sk = ws_bill_customer_sk
+   and ws_sold_date_sk = d_date_sk
+ group by c_customer_id
+         ,c_first_name
+         ,c_last_name
+         ,c_preferred_cust_flag 
+         ,c_birth_country
+         ,c_login
+         ,c_email_address
+         ,d_year
+         )
+  select  
+                  t_s_secyear.customer_id
+                 ,t_s_secyear.customer_first_name
+                 ,t_s_secyear.customer_last_name
+                 ,t_s_secyear.customer_email_address
+ from year_total t_s_firstyear
+     ,year_total t_s_secyear
+     ,year_total t_w_firstyear
+     ,year_total t_w_secyear
+ where t_s_secyear.customer_id = t_s_firstyear.customer_id
+         and t_s_firstyear.customer_id = t_w_secyear.customer_id
+         and t_s_firstyear.customer_id = t_w_firstyear.customer_id
+         and t_s_firstyear.sale_type = 's'
+         and t_w_firstyear.sale_type = 'w'
+         and t_s_secyear.sale_type = 's'
+         and t_w_secyear.sale_type = 'w'
+         and t_s_firstyear.dyear = 2001
+         and t_s_secyear.dyear = 2001+1
+         and t_w_firstyear.dyear = 2001
+         and t_w_secyear.dyear = 2001+1
+         and t_s_firstyear.year_total > 0
+         and t_w_firstyear.year_total > 0
+         and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end
+             > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end
+ order by t_s_secyear.customer_id
+         ,t_s_secyear.customer_first_name
+         ,t_s_secyear.customer_last_name
+         ,t_s_secyear.customer_email_address
+limit 100;
+
+-- end query 1 in stream 0 using template query11.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query12.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query12.sql
new file mode 100644
index 0000000..94a5a9c
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query12.sql
@@ -0,0 +1,34 @@
+-- start query 1 in stream 0 using template query12.tpl
+select  i_item_id
+      ,i_item_desc 
+      ,i_category 
+      ,i_class 
+      ,i_current_price
+      ,sum(ws_ext_sales_price) as itemrevenue 
+      ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over
+          (partition by i_class) as revenueratio
+from	
+	web_sales
+    	,item 
+    	,date_dim
+where 
+	ws_item_sk = i_item_sk 
+  	and i_category in ('Jewelry', 'Sports', 'Books')
+  	and ws_sold_date_sk = d_date_sk
+	and cast(d_date as timestamp) between cast('2001-01-12' as timestamp) 
+				and (cast('2001-01-12' as timestamp) + interval 30 days)
+group by 
+	i_item_id
+        ,i_item_desc 
+        ,i_category
+        ,i_class
+        ,i_current_price
+order by 
+	i_category
+        ,i_class
+        ,i_item_id
+        ,i_item_desc
+        ,revenueratio
+limit 100;
+
+-- end query 1 in stream 0 using template query12.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query13.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query13.sql
new file mode 100644
index 0000000..cc14cc2
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query13.sql
@@ -0,0 +1,52 @@
+-- start query 1 in stream 0 using template query13.tpl
+select avg(ss_quantity)
+       ,avg(ss_ext_sales_price)
+       ,avg(ss_ext_wholesale_cost)
+       ,sum(ss_ext_wholesale_cost)
+ from store_sales
+     ,store
+     ,customer_demographics
+     ,household_demographics
+     ,customer_address
+     ,date_dim
+ where s_store_sk = ss_store_sk
+ and  ss_sold_date_sk = d_date_sk and d_year = 2001
+ and((ss_hdemo_sk=hd_demo_sk
+  and cd_demo_sk = ss_cdemo_sk
+  and cd_marital_status = 'D'
+  and cd_education_status = '2 yr Degree'
+  and ss_sales_price between 100.00 and 150.00
+  and hd_dep_count = 3   
+     )or
+     (ss_hdemo_sk=hd_demo_sk
+  and cd_demo_sk = ss_cdemo_sk
+  and cd_marital_status = 'S'
+  and cd_education_status = 'Secondary'
+  and ss_sales_price between 50.00 and 100.00   
+  and hd_dep_count = 1
+     ) or 
+     (ss_hdemo_sk=hd_demo_sk
+  and cd_demo_sk = ss_cdemo_sk
+  and cd_marital_status = 'W'
+  and cd_education_status = 'Advanced Degree'
+  and ss_sales_price between 150.00 and 200.00 
+  and hd_dep_count = 1  
+     ))
+ and((ss_addr_sk = ca_address_sk
+  and ca_country = 'United States'
+  and ca_state in ('CO', 'IL', 'MN')
+  and ss_net_profit between 100 and 200  
+     ) or
+     (ss_addr_sk = ca_address_sk
+  and ca_country = 'United States'
+  and ca_state in ('OH', 'MT', 'NM')
+  and ss_net_profit between 150 and 300  
+     ) or
+     (ss_addr_sk = ca_address_sk
+  and ca_country = 'United States'
+  and ca_state in ('TX', 'MO', 'MI')
+  and ss_net_profit between 50 and 250  
+     ))
+;
+
+-- end query 1 in stream 0 using template query13.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query15.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query15.sql
new file mode 100644
index 0000000..44de8f4
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query15.sql
@@ -0,0 +1,20 @@
+-- start query 1 in stream 0 using template query15.tpl
+select  ca_zip
+       ,sum(cs_sales_price)
+ from catalog_sales
+     ,customer
+     ,customer_address
+     ,date_dim
+ where cs_bill_customer_sk = c_customer_sk
+ 	and c_current_addr_sk = ca_address_sk 
+ 	and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475',
+                                   '85392', '85460', '80348', '81792')
+ 	      or ca_state in ('CA','WA','GA')
+ 	      or cs_sales_price > 500)
+ 	and cs_sold_date_sk = d_date_sk
+ 	and d_qoy = 2 and d_year = 2000
+ group by ca_zip
+ order by ca_zip
+ limit 100;
+
+-- end query 1 in stream 0 using template query15.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query16.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query16.sql
new file mode 100644
index 0000000..109c83f
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query16.sql
@@ -0,0 +1,31 @@
+-- start query 1 in stream 0 using template query16.tpl
+select  
+   count(distinct cs_order_number) as "order count"
+  ,sum(cs_ext_ship_cost) as "total shipping cost"
+  ,sum(cs_net_profit) as "total net profit"
+from
+   catalog_sales cs1
+  ,date_dim
+  ,customer_address
+  ,call_center
+where
+    cast(d_date as timestamp) between cast('1999-04-01' as timestamp) and 
+           (cast('1999-04-01' as timestamp) + interval 60 days)
+and cs1.cs_ship_date_sk = d_date_sk
+and cs1.cs_ship_addr_sk = ca_address_sk
+and ca_state = 'IA'
+and cs1.cs_call_center_sk = cc_call_center_sk
+and cc_county in ('Mobile County','Maverick County','Huron County','Kittitas County',
+                  'Fairfield County'
+)
+and exists (select *
+            from catalog_sales cs2
+            where cs1.cs_order_number = cs2.cs_order_number
+              and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk)
+and not exists(select *
+               from catalog_returns cr1
+               where cs1.cs_order_number = cr1.cr_order_number)
+order by count(distinct cs_order_number)
+limit 100;
+
+-- end query 1 in stream 0 using template query16.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query17.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query17.sql
new file mode 100644
index 0000000..14fc801
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query17.sql
@@ -0,0 +1,45 @@
+-- start query 1 in stream 0 using template query17.tpl
+select  i_item_id
+       ,i_item_desc
+       ,s_state
+       ,count(ss_quantity) as store_sales_quantitycount
+       ,avg(ss_quantity) as store_sales_quantityave
+       ,stddev_samp(ss_quantity) as store_sales_quantitystdev
+       ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov
+       ,count(sr_return_quantity) as store_returns_quantitycount
+       ,avg(sr_return_quantity) as store_returns_quantityave
+       ,stddev_samp(sr_return_quantity) as store_returns_quantitystdev
+       ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov
+       ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave
+       ,stddev_samp(cs_quantity) as catalog_sales_quantitystdev
+       ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov
+ from store_sales
+     ,store_returns
+     ,catalog_sales
+     ,date_dim d1
+     ,date_dim d2
+     ,date_dim d3
+     ,store
+     ,item
+ where d1.d_quarter_name = '1998Q1'
+   and d1.d_date_sk = ss_sold_date_sk
+   and i_item_sk = ss_item_sk
+   and s_store_sk = ss_store_sk
+   and ss_customer_sk = sr_customer_sk
+   and ss_item_sk = sr_item_sk
+   and ss_ticket_number = sr_ticket_number
+   and sr_returned_date_sk = d2.d_date_sk
+   and d2.d_quarter_name in ('1998Q1','1998Q2','1998Q3')
+   and sr_customer_sk = cs_bill_customer_sk
+   and sr_item_sk = cs_item_sk
+   and cs_sold_date_sk = d3.d_date_sk
+   and d3.d_quarter_name in ('1998Q1','1998Q2','1998Q3')
+ group by i_item_id
+         ,i_item_desc
+         ,s_state
+ order by i_item_id
+         ,i_item_desc
+         ,s_state
+limit 100;
+
+-- end query 1 in stream 0 using template query17.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query19.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query19.sql
new file mode 100644
index 0000000..9bf5de8
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query19.sql
@@ -0,0 +1,25 @@
+-- start query 1 in stream 0 using template query19.tpl
+select  i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact,
+ 	sum(ss_ext_sales_price) ext_price
+ from date_dim, store_sales, item,customer,customer_address,store
+ where d_date_sk = ss_sold_date_sk
+   and ss_item_sk = i_item_sk
+   and i_manager_id=7
+   and d_moy=11
+   and d_year=1999
+   and ss_customer_sk = c_customer_sk 
+   and c_current_addr_sk = ca_address_sk
+   and substr(ca_zip,1,5) <> substr(s_zip,1,5) 
+   and ss_store_sk = s_store_sk 
+ group by i_brand
+      ,i_brand_id
+      ,i_manufact_id
+      ,i_manufact
+ order by ext_price desc
+         ,i_brand
+         ,i_brand_id
+         ,i_manufact_id
+         ,i_manufact
+limit 100 ;
+
+-- end query 1 in stream 0 using template query19.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query2.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query2.sql
new file mode 100644
index 0000000..3188cb6
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query2.sql
@@ -0,0 +1,60 @@
+-- start query 1 in stream 0 using template query2.tpl
+with wscs as
+ (select sold_date_sk
+        ,sales_price
+  from  (select ws_sold_date_sk sold_date_sk
+              ,ws_ext_sales_price sales_price
+        from web_sales 
+        union all
+        select cs_sold_date_sk sold_date_sk
+              ,cs_ext_sales_price sales_price
+        from catalog_sales) x),
+ wswscs as 
+ (select d_week_seq,
+        sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales,
+        sum(case when (d_day_name='Monday') then sales_price else null end) mon_sales,
+        sum(case when (d_day_name='Tuesday') then sales_price else  null end) tue_sales,
+        sum(case when (d_day_name='Wednesday') then sales_price else null end) wed_sales,
+        sum(case when (d_day_name='Thursday') then sales_price else null end) thu_sales,
+        sum(case when (d_day_name='Friday') then sales_price else null end) fri_sales,
+        sum(case when (d_day_name='Saturday') then sales_price else null end) sat_sales
+ from wscs
+     ,date_dim
+ where d_date_sk = sold_date_sk
+ group by d_week_seq)
+ select d_week_seq1
+       ,round(sun_sales1/sun_sales2,2)
+       ,round(mon_sales1/mon_sales2,2)
+       ,round(tue_sales1/tue_sales2,2)
+       ,round(wed_sales1/wed_sales2,2)
+       ,round(thu_sales1/thu_sales2,2)
+       ,round(fri_sales1/fri_sales2,2)
+       ,round(sat_sales1/sat_sales2,2)
+ from
+ (select wswscs.d_week_seq d_week_seq1
+        ,sun_sales sun_sales1
+        ,mon_sales mon_sales1
+        ,tue_sales tue_sales1
+        ,wed_sales wed_sales1
+        ,thu_sales thu_sales1
+        ,fri_sales fri_sales1
+        ,sat_sales sat_sales1
+  from wswscs,date_dim 
+  where date_dim.d_week_seq = wswscs.d_week_seq and
+        d_year = 2001) y,
+ (select wswscs.d_week_seq d_week_seq2
+        ,sun_sales sun_sales2
+        ,mon_sales mon_sales2
+        ,tue_sales tue_sales2
+        ,wed_sales wed_sales2
+        ,thu_sales thu_sales2
+        ,fri_sales fri_sales2
+        ,sat_sales sat_sales2
+  from wswscs
+      ,date_dim 
+  where date_dim.d_week_seq = wswscs.d_week_seq and
+        d_year = 2001+1) z
+ where d_week_seq1=d_week_seq2-53
+ order by d_week_seq1;
+
+-- end query 1 in stream 0 using template query2.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query20.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query20.sql
new file mode 100644
index 0000000..7117e8e
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query20.sql
@@ -0,0 +1,30 @@
+-- start query 1 in stream 0 using template query20.tpl
+select  i_item_id
+       ,i_item_desc 
+       ,i_category 
+       ,i_class 
+       ,i_current_price
+       ,sum(cs_ext_sales_price) as itemrevenue 
+       ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over
+           (partition by i_class) as revenueratio
+ from	catalog_sales
+     ,item 
+     ,date_dim
+ where cs_item_sk = i_item_sk 
+   and i_category in ('Jewelry', 'Sports', 'Books')
+   and cs_sold_date_sk = d_date_sk
+ and cast(d_date as timestamp) between cast('2001-01-12' as timestamp) 
+ 				and (cast('2001-01-12' as timestamp) + interval 30 days)
+ group by i_item_id
+         ,i_item_desc 
+         ,i_category
+         ,i_class
+         ,i_current_price
+ order by i_category
+         ,i_class
+         ,i_item_id
+         ,i_item_desc
+         ,revenueratio
+limit 100;
+
+-- end query 1 in stream 0 using template query20.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query21.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query21.sql
new file mode 100644
index 0000000..fcc2486
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query21.sql
@@ -0,0 +1,30 @@
+-- start query 1 in stream 0 using template query21.tpl
+select  *
+ from(select w_warehouse_name
+            ,i_item_id
+            ,sum(case when (cast(d_date as timestamp) < cast ('1998-04-08' as timestamp))
+	                then inv_quantity_on_hand 
+                      else 0 end) as inv_before
+            ,sum(case when (cast(d_date as timestamp) >= cast ('1998-04-08' as timestamp))
+                      then inv_quantity_on_hand 
+                      else 0 end) as inv_after
+   from inventory
+       ,warehouse
+       ,item
+       ,date_dim
+   where i_current_price between 0.99 and 1.49
+     and i_item_sk          = inv_item_sk
+     and inv_warehouse_sk   = w_warehouse_sk
+     and inv_date_sk    = d_date_sk
+     and cast(d_date as timestamp) between (cast ('1998-04-08' as timestamp) - interval 30 days)
+                    and (cast ('1998-04-08' as timestamp) + interval 30 days)
+   group by w_warehouse_name, i_item_id) x
+ where (case when inv_before > 0 
+             then inv_after / inv_before 
+             else null
+             end) between 2.0/3.0 and 3.0/2.0
+ order by w_warehouse_name
+         ,i_item_id
+ limit 100;
+
+-- end query 1 in stream 0 using template query21.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query25.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query25.sql
new file mode 100644
index 0000000..11695c1
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query25.sql
@@ -0,0 +1,48 @@
+-- start query 1 in stream 0 using template query25.tpl
+select  
+ i_item_id
+ ,i_item_desc
+ ,s_store_id
+ ,s_store_name
+ ,sum(ss_net_profit) as store_sales_profit
+ ,sum(sr_net_loss) as store_returns_loss
+ ,sum(cs_net_profit) as catalog_sales_profit
+ from
+ store_sales
+ ,store_returns
+ ,catalog_sales
+ ,date_dim d1
+ ,date_dim d2
+ ,date_dim d3
+ ,store
+ ,item
+ where
+ d1.d_moy = 4
+ and d1.d_year = 2000
+ and d1.d_date_sk = ss_sold_date_sk
+ and i_item_sk = ss_item_sk
+ and s_store_sk = ss_store_sk
+ and ss_customer_sk = sr_customer_sk
+ and ss_item_sk = sr_item_sk
+ and ss_ticket_number = sr_ticket_number
+ and sr_returned_date_sk = d2.d_date_sk
+ and d2.d_moy               between 4 and  10
+ and d2.d_year              = 2000
+ and sr_customer_sk = cs_bill_customer_sk
+ and sr_item_sk = cs_item_sk
+ and cs_sold_date_sk = d3.d_date_sk
+ and d3.d_moy               between 4 and  10 
+ and d3.d_year              = 2000
+ group by
+ i_item_id
+ ,i_item_desc
+ ,s_store_id
+ ,s_store_name
+ order by
+ i_item_id
+ ,i_item_desc
+ ,s_store_id
+ ,s_store_name
+ limit 100;
+
+-- end query 1 in stream 0 using template query25.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query26.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query26.sql
new file mode 100644
index 0000000..180ee4f
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query26.sql
@@ -0,0 +1,21 @@
+-- start query 1 in stream 0 using template query26.tpl
+select  i_item_id, 
+        avg(cs_quantity) agg1,
+        avg(cs_list_price) agg2,
+        avg(cs_coupon_amt) agg3,
+        avg(cs_sales_price) agg4 
+ from catalog_sales, customer_demographics, date_dim, item, promotion
+ where cs_sold_date_sk = d_date_sk and
+       cs_item_sk = i_item_sk and
+       cs_bill_cdemo_sk = cd_demo_sk and
+       cs_promo_sk = p_promo_sk and
+       cd_gender = 'F' and 
+       cd_marital_status = 'W' and
+       cd_education_status = 'Primary' and
+       (p_channel_email = 'N' or p_channel_event = 'N') and
+       d_year = 1998 
+ group by i_item_id
+ order by i_item_id
+ limit 100;
+
+-- end query 1 in stream 0 using template query26.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query28.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query28.sql
new file mode 100644
index 0000000..e654cd0
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query28.sql
@@ -0,0 +1,53 @@
+-- start query 1 in stream 0 using template query28.tpl
+select  *
+from (select avg(ss_list_price) B1_LP
+            ,count(ss_list_price) B1_CNT
+            ,count(distinct ss_list_price) B1_CNTD
+      from store_sales
+      where ss_quantity between 0 and 5
+        and (ss_list_price between 11 and 11+10 
+             or ss_coupon_amt between 460 and 460+1000
+             or ss_wholesale_cost between 14 and 14+20)) B1,
+     (select avg(ss_list_price) B2_LP
+            ,count(ss_list_price) B2_CNT
+            ,count(distinct ss_list_price) B2_CNTD
+      from store_sales
+      where ss_quantity between 6 and 10
+        and (ss_list_price between 91 and 91+10
+          or ss_coupon_amt between 1430 and 1430+1000
+          or ss_wholesale_cost between 32 and 32+20)) B2,
+     (select avg(ss_list_price) B3_LP
+            ,count(ss_list_price) B3_CNT
+            ,count(distinct ss_list_price) B3_CNTD
+      from store_sales
+      where ss_quantity between 11 and 15
+        and (ss_list_price between 66 and 66+10
+          or ss_coupon_amt between 920 and 920+1000
+          or ss_wholesale_cost between 4 and 4+20)) B3,
+     (select avg(ss_list_price) B4_LP
+            ,count(ss_list_price) B4_CNT
+            ,count(distinct ss_list_price) B4_CNTD
+      from store_sales
+      where ss_quantity between 16 and 20
+        and (ss_list_price between 142 and 142+10
+          or ss_coupon_amt between 3054 and 3054+1000
+          or ss_wholesale_cost between 80 and 80+20)) B4,
+     (select avg(ss_list_price) B5_LP
+            ,count(ss_list_price) B5_CNT
+            ,count(distinct ss_list_price) B5_CNTD
+      from store_sales
+      where ss_quantity between 21 and 25
+        and (ss_list_price between 135 and 135+10
+          or ss_coupon_amt between 14180 and 14180+1000
+          or ss_wholesale_cost between 38 and 38+20)) B5,
+     (select avg(ss_list_price) B6_LP
+            ,count(ss_list_price) B6_CNT
+            ,count(distinct ss_list_price) B6_CNTD
+      from store_sales
+      where ss_quantity between 26 and 30
+        and (ss_list_price between 28 and 28+10
+          or ss_coupon_amt between 2513 and 2513+1000
+          or ss_wholesale_cost between 42 and 42+20)) B6
+limit 100;
+
+-- end query 1 in stream 0 using template query28.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query29.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query29.sql
new file mode 100644
index 0000000..bbe5922
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query29.sql
@@ -0,0 +1,47 @@
+-- start query 1 in stream 0 using template query29.tpl
+select   
+     i_item_id
+    ,i_item_desc
+    ,s_store_id
+    ,s_store_name
+    ,sum(ss_quantity)        as store_sales_quantity
+    ,sum(sr_return_quantity) as store_returns_quantity
+    ,sum(cs_quantity)        as catalog_sales_quantity
+ from
+    store_sales
+   ,store_returns
+   ,catalog_sales
+   ,date_dim             d1
+   ,date_dim             d2
+   ,date_dim             d3
+   ,store
+   ,item
+ where
+     d1.d_moy               = 4 
+ and d1.d_year              = 1999
+ and d1.d_date_sk           = ss_sold_date_sk
+ and i_item_sk              = ss_item_sk
+ and s_store_sk             = ss_store_sk
+ and ss_customer_sk         = sr_customer_sk
+ and ss_item_sk             = sr_item_sk
+ and ss_ticket_number       = sr_ticket_number
+ and sr_returned_date_sk    = d2.d_date_sk
+ and d2.d_moy               between 4 and  4 + 3 
+ and d2.d_year              = 1999
+ and sr_customer_sk         = cs_bill_customer_sk
+ and sr_item_sk             = cs_item_sk
+ and cs_sold_date_sk        = d3.d_date_sk     
+ and d3.d_year              in (1999,1999+1,1999+2)
+ group by
+    i_item_id
+   ,i_item_desc
+   ,s_store_id
+   ,s_store_name
+ order by
+    i_item_id 
+   ,i_item_desc
+   ,s_store_id
+   ,s_store_name
+ limit 100;
+
+-- end query 1 in stream 0 using template query29.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query3.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query3.sql
new file mode 100644
index 0000000..3bd370d
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query3.sql
@@ -0,0 +1,21 @@
+-- start query 1 in stream 0 using template query3.tpl
+select  dt.d_year 
+       ,item.i_brand_id brand_id 
+       ,item.i_brand brand
+       ,sum(ss_ext_sales_price) sum_agg
+ from  date_dim dt 
+      ,store_sales
+      ,item
+ where dt.d_date_sk = store_sales.ss_sold_date_sk
+   and store_sales.ss_item_sk = item.i_item_sk
+   and item.i_manufact_id = 436
+   and dt.d_moy=12
+ group by dt.d_year
+      ,item.i_brand
+      ,item.i_brand_id
+ order by dt.d_year
+         ,sum_agg desc
+         ,brand_id
+ limit 100;
+
+-- end query 1 in stream 0 using template query3.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query30.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query30.sql
new file mode 100644
index 0000000..1057157
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query30.sql
@@ -0,0 +1,31 @@
+-- start query 1 in stream 0 using template query30.tpl
+with customer_total_return as
+ (select wr_returning_customer_sk as ctr_customer_sk
+        ,ca_state as ctr_state, 
+ 	sum(wr_return_amt) as ctr_total_return
+ from web_returns
+     ,date_dim
+     ,customer_address
+ where wr_returned_date_sk = d_date_sk 
+   and d_year =2002
+   and wr_returning_addr_sk = ca_address_sk 
+ group by wr_returning_customer_sk
+         ,ca_state)
+  select  c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
+       ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
+       ,c_last_review_date,ctr_total_return
+ from customer_total_return ctr1
+     ,customer_address
+     ,customer
+ where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+ 			  from customer_total_return ctr2 
+                  	  where ctr1.ctr_state = ctr2.ctr_state)
+       and ca_address_sk = c_current_addr_sk
+       and ca_state = 'IL'
+       and ctr1.ctr_customer_sk = c_customer_sk
+ order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
+                  ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
+                  ,c_last_review_date,ctr_total_return
+limit 100;
+
+-- end query 1 in stream 0 using template query30.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query31.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query31.sql
new file mode 100644
index 0000000..abbafe1
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query31.sql
@@ -0,0 +1,52 @@
+-- start query 1 in stream 0 using template query31.tpl
+with ss as
+ (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales
+ from store_sales,date_dim,customer_address
+ where ss_sold_date_sk = d_date_sk
+  and ss_addr_sk=ca_address_sk
+ group by ca_county,d_qoy, d_year),
+ ws as
+ (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales
+ from web_sales,date_dim,customer_address
+ where ws_sold_date_sk = d_date_sk
+  and ws_bill_addr_sk=ca_address_sk
+ group by ca_county,d_qoy, d_year)
+ select 
+        ss1.ca_county
+       ,ss1.d_year
+       ,ws2.web_sales/ws1.web_sales web_q1_q2_increase
+       ,ss2.store_sales/ss1.store_sales store_q1_q2_increase
+       ,ws3.web_sales/ws2.web_sales web_q2_q3_increase
+       ,ss3.store_sales/ss2.store_sales store_q2_q3_increase
+ from
+        ss ss1
+       ,ss ss2
+       ,ss ss3
+       ,ws ws1
+       ,ws ws2
+       ,ws ws3
+ where
+    ss1.d_qoy = 1
+    and ss1.d_year = 2000
+    and ss1.ca_county = ss2.ca_county
+    and ss2.d_qoy = 2
+    and ss2.d_year = 2000
+ and ss2.ca_county = ss3.ca_county
+    and ss3.d_qoy = 3
+    and ss3.d_year = 2000
+    and ss1.ca_county = ws1.ca_county
+    and ws1.d_qoy = 1
+    and ws1.d_year = 2000
+    and ws1.ca_county = ws2.ca_county
+    and ws2.d_qoy = 2
+    and ws2.d_year = 2000
+    and ws1.ca_county = ws3.ca_county
+    and ws3.d_qoy = 3
+    and ws3.d_year =2000
+    and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end 
+       > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end
+    and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end
+       > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end
+ order by ss1.d_year;
+
+-- end query 1 in stream 0 using template query31.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query32.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query32.sql
new file mode 100644
index 0000000..0ac08b3
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query32.sql
@@ -0,0 +1,28 @@
+-- start query 1 in stream 0 using template query32.tpl
+select  sum(cs_ext_discount_amt)  as "excess discount amount" 
+from 
+   catalog_sales 
+   ,item 
+   ,date_dim
+where
+i_manufact_id = 269
+and i_item_sk = cs_item_sk 
+and cast(d_date as timestamp) between cast('1998-03-18' as timestamp) and
+        (cast('1998-03-18' as timestamp) + interval 90 days)
+and d_date_sk = cs_sold_date_sk 
+and cs_ext_discount_amt  
+     > ( 
+         select 
+            1.3 * avg(cs_ext_discount_amt) 
+         from 
+            catalog_sales 
+           ,date_dim
+         where 
+              cs_item_sk = i_item_sk 
+          and cast(d_date as timestamp) between cast('1998-03-18' as timestamp) and
+                             (cast('1998-03-18' as timestamp) + interval 90 days)
+          and d_date_sk = cs_sold_date_sk 
+      ) 
+limit 100;
+
+-- end query 1 in stream 0 using template query32.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query33.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query33.sql
new file mode 100644
index 0000000..a18c9c9
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query33.sql
@@ -0,0 +1,75 @@
+-- start query 1 in stream 0 using template query33.tpl
+with ss as (
+ select
+          i_manufact_id,sum(ss_ext_sales_price) total_sales
+ from
+ 	store_sales,
+ 	date_dim,
+         customer_address,
+         item
+ where
+         i_manufact_id in (select
+  i_manufact_id
+from
+ item
+where i_category in ('Books'))
+ and     ss_item_sk              = i_item_sk
+ and     ss_sold_date_sk         = d_date_sk
+ and     d_year                  = 1999
+ and     d_moy                   = 3
+ and     ss_addr_sk              = ca_address_sk
+ and     ca_gmt_offset           = -6 
+ group by i_manufact_id),
+ cs as (
+ select
+          i_manufact_id,sum(cs_ext_sales_price) total_sales
+ from
+ 	catalog_sales,
+ 	date_dim,
+         customer_address,
+         item
+ where
+         i_manufact_id               in (select
+  i_manufact_id
+from
+ item
+where i_category in ('Books'))
+ and     cs_item_sk              = i_item_sk
+ and     cs_sold_date_sk         = d_date_sk
+ and     d_year                  = 1999
+ and     d_moy                   = 3
+ and     cs_bill_addr_sk         = ca_address_sk
+ and     ca_gmt_offset           = -6 
+ group by i_manufact_id),
+ ws as (
+ select
+          i_manufact_id,sum(ws_ext_sales_price) total_sales
+ from
+ 	web_sales,
+ 	date_dim,
+         customer_address,
+         item
+ where
+         i_manufact_id               in (select
+  i_manufact_id
+from
+ item
+where i_category in ('Books'))
+ and     ws_item_sk              = i_item_sk
+ and     ws_sold_date_sk         = d_date_sk
+ and     d_year                  = 1999
+ and     d_moy                   = 3
+ and     ws_bill_addr_sk         = ca_address_sk
+ and     ca_gmt_offset           = -6
+ group by i_manufact_id)
+  select  i_manufact_id ,sum(total_sales) total_sales
+ from  (select * from ss 
+        union all
+        select * from cs 
+        union all
+        select * from ws) tmp1
+ group by i_manufact_id
+ order by total_sales
+limit 100;
+
+-- end query 1 in stream 0 using template query33.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query34.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query34.sql
new file mode 100644
index 0000000..b92b8b1
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query34.sql
@@ -0,0 +1,31 @@
+-- start query 1 in stream 0 using template query34.tpl
+select c_last_name
+       ,c_first_name
+       ,c_salutation
+       ,c_preferred_cust_flag
+       ,ss_ticket_number
+       ,cnt from
+   (select ss_ticket_number
+          ,ss_customer_sk
+          ,count(*) cnt
+    from store_sales,date_dim,store,household_demographics
+    where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+    and store_sales.ss_store_sk = store.s_store_sk  
+    and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+    and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28)
+    and (household_demographics.hd_buy_potential = '1001-5000' or
+         household_demographics.hd_buy_potential = '5001-10000')
+    and household_demographics.hd_vehicle_count > 0
+    and (case when household_demographics.hd_vehicle_count > 0 
+	then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count 
+	else null 
+	end)  > 1.2
+    and date_dim.d_year in (1998,1998+1,1998+2)
+    and store.s_county in ('Kittitas County','Adams County','Richland County','Furnas County',
+                           'Orange County','Appanoose County','Franklin Parish','Tehama County')
+    group by ss_ticket_number,ss_customer_sk) dn,customer
+    where ss_customer_sk = c_customer_sk
+      and cnt between 15 and 20
+    order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc, ss_ticket_number;
+
+-- end query 1 in stream 0 using template query34.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query37.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query37.sql
new file mode 100644
index 0000000..97bbed1
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query37.sql
@@ -0,0 +1,17 @@
+-- start query 1 in stream 0 using template query37.tpl
+select  i_item_id
+       ,i_item_desc
+       ,i_current_price
+ from item, inventory, date_dim, catalog_sales
+ where i_current_price between 22 and 22 + 30
+ and inv_item_sk = i_item_sk
+ and d_date_sk=inv_date_sk
+ and cast(d_date as timestamp) between cast('2001-06-02' as timestamp) and (cast('2001-06-02' as timestamp) + interval 60 days)
+ and i_manufact_id in (678,964,918,849)
+ and inv_quantity_on_hand between 100 and 500
+ and cs_item_sk = i_item_sk
+ group by i_item_id,i_item_desc,i_current_price
+ order by i_item_id
+ limit 100;
+
+-- end query 1 in stream 0 using template query37.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query39.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query39.sql
new file mode 100644
index 0000000..73d4645
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query39.sql
@@ -0,0 +1,54 @@
+-- start query 1 in stream 0 using template query39.tpl
+with inv as
+(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy
+       ,stdev,mean, case mean when 0 then null else stdev/mean end cov
+ from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy
+            ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean
+      from inventory
+          ,item
+          ,warehouse
+          ,date_dim
+      where inv_item_sk = i_item_sk
+        and inv_warehouse_sk = w_warehouse_sk
+        and inv_date_sk = d_date_sk
+        and d_year =1998
+      group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo
+ where case mean when 0 then 0 else stdev/mean end > 1)
+select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov
+        ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov
+from inv inv1,inv inv2
+where inv1.i_item_sk = inv2.i_item_sk
+  and inv1.w_warehouse_sk =  inv2.w_warehouse_sk
+  and inv1.d_moy=4
+  and inv2.d_moy=4+1
+order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov
+        ,inv2.d_moy,inv2.mean, inv2.cov
+;
+with inv as
+(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy
+       ,stdev,mean, case mean when 0 then null else stdev/mean end cov
+ from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy
+            ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean
+      from inventory
+          ,item
+          ,warehouse
+          ,date_dim
+      where inv_item_sk = i_item_sk
+        and inv_warehouse_sk = w_warehouse_sk
+        and inv_date_sk = d_date_sk
+        and d_year =1998
+      group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo
+ where case mean when 0 then 0 else stdev/mean end > 1)
+select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov
+        ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov
+from inv inv1,inv inv2
+where inv1.i_item_sk = inv2.i_item_sk
+  and inv1.w_warehouse_sk =  inv2.w_warehouse_sk
+  and inv1.d_moy=4
+  and inv2.d_moy=4+1
+  and inv1.cov > 1.5
+order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov
+        ,inv2.d_moy,inv2.mean, inv2.cov
+;
+
+-- end query 1 in stream 0 using template query39.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query4.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query4.sql
new file mode 100644
index 0000000..ecbe6c4
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query4.sql
@@ -0,0 +1,116 @@
+-- start query 1 in stream 0 using template query4.tpl
+with year_total as (
+ select c_customer_id customer_id
+       ,c_first_name customer_first_name
+       ,c_last_name customer_last_name
+       ,c_preferred_cust_flag customer_preferred_cust_flag
+       ,c_birth_country customer_birth_country
+       ,c_login customer_login
+       ,c_email_address customer_email_address
+       ,d_year dyear
+       ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total
+       ,'s' sale_type
+ from customer
+     ,store_sales
+     ,date_dim
+ where c_customer_sk = ss_customer_sk
+   and ss_sold_date_sk = d_date_sk
+ group by c_customer_id
+         ,c_first_name
+         ,c_last_name
+         ,c_preferred_cust_flag
+         ,c_birth_country
+         ,c_login
+         ,c_email_address
+         ,d_year
+ union all
+ select c_customer_id customer_id
+       ,c_first_name customer_first_name
+       ,c_last_name customer_last_name
+       ,c_preferred_cust_flag customer_preferred_cust_flag
+       ,c_birth_country customer_birth_country
+       ,c_login customer_login
+       ,c_email_address customer_email_address
+       ,d_year dyear
+       ,sum((((cs_ext_list_price-cs_ext_wholesale_cost-cs_ext_discount_amt)+cs_ext_sales_price)/2) ) year_total
+       ,'c' sale_type
+ from customer
+     ,catalog_sales
+     ,date_dim
+ where c_customer_sk = cs_bill_customer_sk
+   and cs_sold_date_sk = d_date_sk
+ group by c_customer_id
+         ,c_first_name
+         ,c_last_name
+         ,c_preferred_cust_flag
+         ,c_birth_country
+         ,c_login
+         ,c_email_address
+         ,d_year
+union all
+ select c_customer_id customer_id
+       ,c_first_name customer_first_name
+       ,c_last_name customer_last_name
+       ,c_preferred_cust_flag customer_preferred_cust_flag
+       ,c_birth_country customer_birth_country
+       ,c_login customer_login
+       ,c_email_address customer_email_address
+       ,d_year dyear
+       ,sum((((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2) ) year_total
+       ,'w' sale_type
+ from customer
+     ,web_sales
+     ,date_dim
+ where c_customer_sk = ws_bill_customer_sk
+   and ws_sold_date_sk = d_date_sk
+ group by c_customer_id
+         ,c_first_name
+         ,c_last_name
+         ,c_preferred_cust_flag
+         ,c_birth_country
+         ,c_login
+         ,c_email_address
+         ,d_year
+         )
+  select  
+                  t_s_secyear.customer_id
+                 ,t_s_secyear.customer_first_name
+                 ,t_s_secyear.customer_last_name
+                 ,t_s_secyear.customer_email_address
+ from year_total t_s_firstyear
+     ,year_total t_s_secyear
+     ,year_total t_c_firstyear
+     ,year_total t_c_secyear
+     ,year_total t_w_firstyear
+     ,year_total t_w_secyear
+ where t_s_secyear.customer_id = t_s_firstyear.customer_id
+   and t_s_firstyear.customer_id = t_c_secyear.customer_id
+   and t_s_firstyear.customer_id = t_c_firstyear.customer_id
+   and t_s_firstyear.customer_id = t_w_firstyear.customer_id
+   and t_s_firstyear.customer_id = t_w_secyear.customer_id
+   and t_s_firstyear.sale_type = 's'
+   and t_c_firstyear.sale_type = 'c'
+   and t_w_firstyear.sale_type = 'w'
+   and t_s_secyear.sale_type = 's'
+   and t_c_secyear.sale_type = 'c'
+   and t_w_secyear.sale_type = 'w'
+   and t_s_firstyear.dyear =  2001
+   and t_s_secyear.dyear = 2001+1
+   and t_c_firstyear.dyear =  2001
+   and t_c_secyear.dyear =  2001+1
+   and t_w_firstyear.dyear = 2001
+   and t_w_secyear.dyear = 2001+1
+   and t_s_firstyear.year_total > 0
+   and t_c_firstyear.year_total > 0
+   and t_w_firstyear.year_total > 0
+   and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end
+           > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end
+   and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end
+           > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end
+ order by t_s_secyear.customer_id
+         ,t_s_secyear.customer_first_name
+         ,t_s_secyear.customer_last_name
+         ,t_s_secyear.customer_email_address
+limit 100;
+
+-- end query 1 in stream 0 using template query4.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query40.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query40.sql
new file mode 100644
index 0000000..e44e732
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query40.sql
@@ -0,0 +1,28 @@
+-- start query 1 in stream 0 using template query40.tpl
+select  
+   w_state
+  ,i_item_id
+  ,sum(case when (cast(d_date as timestamp) < cast ('1998-04-08' as timestamp)) 
+ 		then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before
+  ,sum(case when (cast(d_date as timestamp) >= cast ('1998-04-08' as timestamp)) 
+ 		then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after
+ from
+   catalog_sales left outer join catalog_returns on
+       (cs_order_number = cr_order_number 
+        and cs_item_sk = cr_item_sk)
+  ,warehouse 
+  ,item
+  ,date_dim
+ where
+     i_current_price between 0.99 and 1.49
+ and i_item_sk          = cs_item_sk
+ and cs_warehouse_sk    = w_warehouse_sk 
+ and cs_sold_date_sk    = d_date_sk
+ and cast(d_date as timestamp) between (cast ('1998-04-08' as timestamp) - interval 30 days)
+                and (cast ('1998-04-08' as timestamp) + interval 30 days) 
+ group by
+    w_state,i_item_id
+ order by w_state,i_item_id
+limit 100;
+
+-- end query 1 in stream 0 using template query40.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query42.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query42.sql
new file mode 100644
index 0000000..eea2b5c
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query42.sql
@@ -0,0 +1,22 @@
+-- start query 1 in stream 0 using template query42.tpl
+select  dt.d_year
+ 	,item.i_category_id
+ 	,item.i_category
+ 	,sum(ss_ext_sales_price)
+ from 	date_dim dt
+ 	,store_sales
+ 	,item
+ where dt.d_date_sk = store_sales.ss_sold_date_sk
+ 	and store_sales.ss_item_sk = item.i_item_sk
+ 	and item.i_manager_id = 1  	
+ 	and dt.d_moy=12
+ 	and dt.d_year=1998
+ group by 	dt.d_year
+ 		,item.i_category_id
+ 		,item.i_category
+ order by       sum(ss_ext_sales_price) desc,dt.d_year
+ 		,item.i_category_id
+ 		,item.i_category
+limit 100 ;
+
+-- end query 1 in stream 0 using template query42.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query43.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query43.sql
new file mode 100644
index 0000000..7dddfd8
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query43.sql
@@ -0,0 +1,19 @@
+-- start query 1 in stream 0 using template query43.tpl
+select  s_store_name, s_store_id,
+        sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales,
+        sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales,
+        sum(case when (d_day_name='Tuesday') then ss_sales_price else  null end) tue_sales,
+        sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales,
+        sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales,
+        sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales,
+        sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales
+ from date_dim, store_sales, store
+ where d_date_sk = ss_sold_date_sk and
+       s_store_sk = ss_store_sk and
+       s_gmt_offset = -6 and
+       d_year = 1998 
+ group by s_store_name, s_store_id
+ order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales
+ limit 100;
+
+-- end query 1 in stream 0 using template query43.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query46.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query46.sql
new file mode 100644
index 0000000..552ffb8
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query46.sql
@@ -0,0 +1,35 @@
+-- start query 1 in stream 0 using template query46.tpl
+select  c_last_name
+       ,c_first_name
+       ,ca_city
+       ,bought_city
+       ,ss_ticket_number
+       ,amt,profit 
+ from
+   (select ss_ticket_number
+          ,ss_customer_sk
+          ,ca_city bought_city
+          ,sum(ss_coupon_amt) amt
+          ,sum(ss_net_profit) profit
+    from store_sales,date_dim,store,household_demographics,customer_address 
+    where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+    and store_sales.ss_store_sk = store.s_store_sk  
+    and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+    and store_sales.ss_addr_sk = customer_address.ca_address_sk
+    and (household_demographics.hd_dep_count = 4 or
+         household_demographics.hd_vehicle_count= 2)
+    and date_dim.d_dow in (6,0)
+    and date_dim.d_year in (1998,1998+1,1998+2) 
+    and store.s_city in ('Rosedale','Bethlehem','Clinton','Clifton','Springfield') 
+    group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn,customer,customer_address current_addr
+    where ss_customer_sk = c_customer_sk
+      and customer.c_current_addr_sk = current_addr.ca_address_sk
+      and current_addr.ca_city <> bought_city
+  order by c_last_name
+          ,c_first_name
+          ,ca_city
+          ,bought_city
+          ,ss_ticket_number
+  limit 100;
+
+-- end query 1 in stream 0 using template query46.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query47.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query47.sql
new file mode 100644
index 0000000..e85a20d
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query47.sql
@@ -0,0 +1,51 @@
+-- start query 1 in stream 0 using template query47.tpl
+with v1 as(
+ select i_category, i_brand,
+        s_store_name, s_company_name,
+        d_year, d_moy,
+        sum(ss_sales_price) sum_sales,
+        avg(sum(ss_sales_price)) over
+          (partition by i_category, i_brand,
+                     s_store_name, s_company_name, d_year)
+          avg_monthly_sales,
+        rank() over
+          (partition by i_category, i_brand,
+                     s_store_name, s_company_name
+           order by d_year, d_moy) rn
+ from item, store_sales, date_dim, store
+ where ss_item_sk = i_item_sk and
+       ss_sold_date_sk = d_date_sk and
+       ss_store_sk = s_store_sk and
+       (
+         d_year = 2000 or
+         ( d_year = 2000-1 and d_moy =12) or
+         ( d_year = 2000+1 and d_moy =1)
+       )
+ group by i_category, i_brand,
+          s_store_name, s_company_name,
+          d_year, d_moy),
+ v2 as(
+ select v1.i_category
+        ,v1.d_year, v1.d_moy
+        ,v1.avg_monthly_sales
+        ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum
+ from v1, v1 v1_lag, v1 v1_lead
+ where v1.i_category = v1_lag.i_category and
+       v1.i_category = v1_lead.i_category and
+       v1.i_brand = v1_lag.i_brand and
+       v1.i_brand = v1_lead.i_brand and
+       v1.s_store_name = v1_lag.s_store_name and
+       v1.s_store_name = v1_lead.s_store_name and
+       v1.s_company_name = v1_lag.s_company_name and
+       v1.s_company_name = v1_lead.s_company_name and
+       v1.rn = v1_lag.rn + 1 and
+       v1.rn = v1_lead.rn - 1)
+  select  *
+ from v2
+ where  d_year = 2000 and    
+        avg_monthly_sales > 0 and
+        case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1
+ order by sum_sales - avg_monthly_sales, 3
+ limit 100;
+
+-- end query 1 in stream 0 using template query47.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query48.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query48.sql
new file mode 100644
index 0000000..f1c5e87
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query48.sql
@@ -0,0 +1,67 @@
+-- start query 1 in stream 0 using template query48.tpl
+select sum (ss_quantity)
+ from store_sales, store, customer_demographics, customer_address, date_dim
+ where s_store_sk = ss_store_sk
+ and  ss_sold_date_sk = d_date_sk and d_year = 1998
+ and  
+ (
+  (
+   cd_demo_sk = ss_cdemo_sk
+   and 
+   cd_marital_status = 'M'
+   and 
+   cd_education_status = '4 yr Degree'
+   and 
+   ss_sales_price between 100.00 and 150.00  
+   )
+ or
+  (
+  cd_demo_sk = ss_cdemo_sk
+   and 
+   cd_marital_status = 'D'
+   and 
+   cd_education_status = 'Primary'
+   and 
+   ss_sales_price between 50.00 and 100.00   
+  )
+ or 
+ (
+  cd_demo_sk = ss_cdemo_sk
+  and 
+   cd_marital_status = 'U'
+   and 
+   cd_education_status = 'Advanced Degree'
+   and 
+   ss_sales_price between 150.00 and 200.00  
+ )
+ )
+ and
+ (
+  (
+  ss_addr_sk = ca_address_sk
+  and
+  ca_country = 'United States'
+  and
+  ca_state in ('KY', 'GA', 'NM')
+  and ss_net_profit between 0 and 2000  
+  )
+ or
+  (ss_addr_sk = ca_address_sk
+  and
+  ca_country = 'United States'
+  and
+  ca_state in ('MT', 'OR', 'IN')
+  and ss_net_profit between 150 and 3000 
+  )
+ or
+  (ss_addr_sk = ca_address_sk
+  and
+  ca_country = 'United States'
+  and
+  ca_state in ('WI', 'MO', 'WV')
+  and ss_net_profit between 50 and 25000 
+  )
+ )
+;
+
+-- end query 1 in stream 0 using template query48.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query49.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query49.sql
new file mode 100644
index 0000000..4f7ffaa
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query49.sql
@@ -0,0 +1,127 @@
+-- start query 1 in stream 0 using template query49.tpl
+select  
+ 'web' as channel
+ ,web.item
+ ,web.return_ratio
+ ,web.return_rank
+ ,web.currency_rank
+ from (
+ 	select 
+ 	 item
+ 	,return_ratio
+ 	,currency_ratio
+ 	,rank() over (order by return_ratio) as return_rank
+ 	,rank() over (order by currency_ratio) as currency_rank
+ 	from
+ 	(	select ws.ws_item_sk as item
+ 		,(cast(sum(coalesce(wr.wr_return_quantity,0)) as decimal(15,4))/
+ 		cast(sum(coalesce(ws.ws_quantity,0)) as decimal(15,4) )) as return_ratio
+ 		,(cast(sum(coalesce(wr.wr_return_amt,0)) as decimal(15,4))/
+ 		cast(sum(coalesce(ws.ws_net_paid,0)) as decimal(15,4) )) as currency_ratio
+ 		from 
+ 		 web_sales ws left outer join web_returns wr 
+ 			on (ws.ws_order_number = wr.wr_order_number and 
+ 			ws.ws_item_sk = wr.wr_item_sk)
+                 ,date_dim
+ 		where 
+ 			wr.wr_return_amt > 10000 
+ 			and ws.ws_net_profit > 1
+                         and ws.ws_net_paid > 0
+                         and ws.ws_quantity > 0
+                         and ws_sold_date_sk = d_date_sk
+                         and d_year = 2000
+                         and d_moy = 12
+ 		group by ws.ws_item_sk
+ 	) in_web
+ ) web
+ where 
+ (
+ web.return_rank <= 10
+ or
+ web.currency_rank <= 10
+ )
+ union
+ select 
+ 'catalog' as channel
+ ,catalog.item
+ ,catalog.return_ratio
+ ,catalog.return_rank
+ ,catalog.currency_rank
+ from (
+ 	select 
+ 	 item
+ 	,return_ratio
+ 	,currency_ratio
+ 	,rank() over (order by return_ratio) as return_rank
+ 	,rank() over (order by currency_ratio) as currency_rank
+ 	from
+ 	(	select 
+ 		cs.cs_item_sk as item
+ 		,(cast(sum(coalesce(cr.cr_return_quantity,0)) as decimal(15,4))/
+ 		cast(sum(coalesce(cs.cs_quantity,0)) as decimal(15,4) )) as return_ratio
+ 		,(cast(sum(coalesce(cr.cr_return_amount,0)) as decimal(15,4))/
+ 		cast(sum(coalesce(cs.cs_net_paid,0)) as decimal(15,4) )) as currency_ratio
+ 		from 
+ 		catalog_sales cs left outer join catalog_returns cr
+ 			on (cs.cs_order_number = cr.cr_order_number and 
+ 			cs.cs_item_sk = cr.cr_item_sk)
+                ,date_dim
+ 		where 
+ 			cr.cr_return_amount > 10000 
+ 			and cs.cs_net_profit > 1
+                         and cs.cs_net_paid > 0
+                         and cs.cs_quantity > 0
+                         and cs_sold_date_sk = d_date_sk
+                         and d_year = 2000
+                         and d_moy = 12
+                 group by cs.cs_item_sk
+ 	) in_cat
+ ) catalog
+ where 
+ (
+ catalog.return_rank <= 10
+ or
+ catalog.currency_rank <=10
+ )
+ union
+ select 
+ 'store' as channel
+ ,store.item
+ ,store.return_ratio
+ ,store.return_rank
+ ,store.currency_rank
+ from (
+ 	select 
+ 	 item
+ 	,return_ratio
+ 	,currency_ratio
+ 	,rank() over (order by return_ratio) as return_rank
+ 	,rank() over (order by currency_ratio) as currency_rank
+ 	from
+ 	(	select sts.ss_item_sk as item
+ 		,(cast(sum(coalesce(sr.sr_return_quantity,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as decimal(15,4) )) as return_ratio
+ 		,(cast(sum(coalesce(sr.sr_return_amt,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as decimal(15,4) )) as currency_ratio
+ 		from 
+ 		store_sales sts left outer join store_returns sr
+ 			on (sts.ss_ticket_number = sr.sr_ticket_number and sts.ss_item_sk = sr.sr_item_sk)
+                ,date_dim
+ 		where 
+ 			sr.sr_return_amt > 10000 
+ 			and sts.ss_net_profit > 1
+                         and sts.ss_net_paid > 0 
+                         and sts.ss_quantity > 0
+                         and ss_sold_date_sk = d_date_sk
+                         and d_year = 2000
+                         and d_moy = 12
+ 		group by sts.ss_item_sk
+ 	) in_store
+ ) store
+ where  (
+ store.return_rank <= 10
+ or 
+ store.currency_rank <= 10
+ )
+ order by 1,4,5,2
+ limit 100;
+
+-- end query 1 in stream 0 using template query49.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query50.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query50.sql
new file mode 100644
index 0000000..04ff0cc
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query50.sql
@@ -0,0 +1,59 @@
+-- start query 1 in stream 0 using template query50.tpl
+select  
+   s_store_name
+  ,s_company_id
+  ,s_street_number
+  ,s_street_name
+  ,s_street_type
+  ,s_suite_number
+  ,s_city
+  ,s_county
+  ,s_state
+  ,s_zip
+  ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end)  as "30 days" 
+  ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and 
+                 (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end )  as "31-60 days" 
+  ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and 
+                 (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end)  as "61-90 days" 
+  ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and
+                 (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end)  as "91-120 days" 
+  ,sum(case when (sr_returned_date_sk - ss_sold_date_sk  > 120) then 1 else 0 end)  as ">120 days" 
+from
+   store_sales
+  ,store_returns
+  ,store
+  ,date_dim d1
+  ,date_dim d2
+where
+    d2.d_year = 2000
+and d2.d_moy  = 9
+and ss_ticket_number = sr_ticket_number
+and ss_item_sk = sr_item_sk
+and ss_sold_date_sk   = d1.d_date_sk
+and sr_returned_date_sk   = d2.d_date_sk
+and ss_customer_sk = sr_customer_sk
+and ss_store_sk = s_store_sk
+group by
+   s_store_name
+  ,s_company_id
+  ,s_street_number
+  ,s_street_name
+  ,s_street_type
+  ,s_suite_number
+  ,s_city
+  ,s_county
+  ,s_state
+  ,s_zip
+order by s_store_name
+        ,s_company_id
+        ,s_street_number
+        ,s_street_name
+        ,s_street_type
+        ,s_suite_number
+        ,s_city
+        ,s_county
+        ,s_state
+        ,s_zip
+limit 100;
+
+-- end query 1 in stream 0 using template query50.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query51.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query51.sql
new file mode 100644
index 0000000..35916ef
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query51.sql
@@ -0,0 +1,45 @@
+-- start query 1 in stream 0 using template query51.tpl
+WITH web_v1 as (
+select
+  ws_item_sk item_sk, d_date,
+  sum(sum(ws_sales_price))
+      over (partition by ws_item_sk order by d_date rows between unbounded preceding and current row) cume_sales
+from web_sales
+    ,date_dim
+where ws_sold_date_sk=d_date_sk
+  and d_month_seq between 1212 and 1212+11
+  and ws_item_sk is not NULL
+group by ws_item_sk, d_date),
+store_v1 as (
+select
+  ss_item_sk item_sk, d_date,
+  sum(sum(ss_sales_price))
+      over (partition by ss_item_sk order by d_date rows between unbounded preceding and current row) cume_sales
+from store_sales
+    ,date_dim
+where ss_sold_date_sk=d_date_sk
+  and d_month_seq between 1212 and 1212+11
+  and ss_item_sk is not NULL
+group by ss_item_sk, d_date)
+ select  *
+from (select item_sk
+     ,d_date
+     ,web_sales
+     ,store_sales
+     ,max(web_sales)
+         over (partition by item_sk order by d_date rows between unbounded preceding and current row) web_cumulative
+     ,max(store_sales)
+         over (partition by item_sk order by d_date rows between unbounded preceding and current row) store_cumulative
+     from (select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk
+                 ,case when web.d_date is not null then web.d_date else store.d_date end d_date
+                 ,web.cume_sales web_sales
+                 ,store.cume_sales store_sales
+           from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk
+                                                          and web.d_date = store.d_date)
+          )x )y
+where web_cumulative > store_cumulative
+order by item_sk
+        ,d_date
+limit 100;
+
+-- end query 1 in stream 0 using template query51.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query52.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query52.sql
new file mode 100644
index 0000000..27970b4
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query52.sql
@@ -0,0 +1,22 @@
+-- start query 1 in stream 0 using template query52.tpl
+select  dt.d_year
+ 	,item.i_brand_id brand_id
+ 	,item.i_brand brand
+ 	,sum(ss_ext_sales_price) ext_price
+ from date_dim dt
+     ,store_sales
+     ,item
+ where dt.d_date_sk = store_sales.ss_sold_date_sk
+    and store_sales.ss_item_sk = item.i_item_sk
+    and item.i_manager_id = 1
+    and dt.d_moy=12
+    and dt.d_year=1998
+ group by dt.d_year
+ 	,item.i_brand
+ 	,item.i_brand_id
+ order by dt.d_year
+ 	,ext_price desc
+ 	,brand_id
+limit 100 ;
+
+-- end query 1 in stream 0 using template query52.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query53.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query53.sql
new file mode 100644
index 0000000..23a6afe
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query53.sql
@@ -0,0 +1,28 @@
+-- start query 1 in stream 0 using template query53.tpl
+select  * from 
+(select i_manufact_id,
+sum(ss_sales_price) sum_sales,
+avg(sum(ss_sales_price)) over (partition by i_manufact_id) avg_quarterly_sales
+from item, store_sales, date_dim, store
+where ss_item_sk = i_item_sk and
+ss_sold_date_sk = d_date_sk and
+ss_store_sk = s_store_sk and
+d_month_seq in (1212,1212+1,1212+2,1212+3,1212+4,1212+5,1212+6,1212+7,1212+8,1212+9,1212+10,1212+11) and
+((i_category in ('Books','Children','Electronics') and
+i_class in ('personal','portable','reference','self-help') and
+i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7',
+		'exportiunivamalg #9','scholaramalgamalg #9'))
+or(i_category in ('Women','Music','Men') and
+i_class in ('accessories','classical','fragrances','pants') and
+i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1',
+		'importoamalg #1')))
+group by i_manufact_id, d_qoy ) tmp1
+where case when avg_quarterly_sales > 0 
+	then abs (sum_sales - avg_quarterly_sales)/ avg_quarterly_sales 
+	else null end > 0.1
+order by avg_quarterly_sales,
+	 sum_sales,
+	 i_manufact_id
+limit 100;
+
+-- end query 1 in stream 0 using template query53.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query55.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query55.sql
new file mode 100644
index 0000000..2fbf996
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query55.sql
@@ -0,0 +1,14 @@
+-- start query 1 in stream 0 using template query55.tpl
+select  i_brand_id brand_id, i_brand brand,
+ 	sum(ss_ext_sales_price) ext_price
+ from date_dim, store_sales, item
+ where d_date_sk = ss_sold_date_sk
+ 	and ss_item_sk = i_item_sk
+ 	and i_manager_id=36
+ 	and d_moy=12
+ 	and d_year=2001
+ group by i_brand, i_brand_id
+ order by ext_price desc, i_brand_id
+limit 100 ;
+
+-- end query 1 in stream 0 using template query55.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query56.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query56.sql
new file mode 100644
index 0000000..9b6ce02
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query56.sql
@@ -0,0 +1,69 @@
+-- start query 1 in stream 0 using template query56.tpl
+with ss as (
+ select i_item_id,sum(ss_ext_sales_price) total_sales
+ from
+ 	store_sales,
+ 	date_dim,
+         customer_address,
+         item
+ where i_item_id in (select
+     i_item_id
+from item
+where i_color in ('orchid','chiffon','lace'))
+ and     ss_item_sk              = i_item_sk
+ and     ss_sold_date_sk         = d_date_sk
+ and     d_year                  = 2000
+ and     d_moy                   = 1
+ and     ss_addr_sk              = ca_address_sk
+ and     ca_gmt_offset           = -8 
+ group by i_item_id),
+ cs as (
+ select i_item_id,sum(cs_ext_sales_price) total_sales
+ from
+ 	catalog_sales,
+ 	date_dim,
+         customer_address,
+         item
+ where
+         i_item_id               in (select
+  i_item_id
+from item
+where i_color in ('orchid','chiffon','lace'))
+ and     cs_item_sk              = i_item_sk
+ and     cs_sold_date_sk         = d_date_sk
+ and     d_year                  = 2000
+ and     d_moy                   = 1
+ and     cs_bill_addr_sk         = ca_address_sk
+ and     ca_gmt_offset           = -8 
+ group by i_item_id),
+ ws as (
+ select i_item_id,sum(ws_ext_sales_price) total_sales
+ from
+ 	web_sales,
+ 	date_dim,
+         customer_address,
+         item
+ where
+         i_item_id               in (select
+  i_item_id
+from item
+where i_color in ('orchid','chiffon','lace'))
+ and     ws_item_sk              = i_item_sk
+ and     ws_sold_date_sk         = d_date_sk
+ and     d_year                  = 2000
+ and     d_moy                   = 1
+ and     ws_bill_addr_sk         = ca_address_sk
+ and     ca_gmt_offset           = -8
+ group by i_item_id)
+  select  i_item_id ,sum(total_sales) total_sales
+ from  (select * from ss 
+        union all
+        select * from cs 
+        union all
+        select * from ws) tmp1
+ group by i_item_id
+ order by total_sales,
+          i_item_id
+ limit 100;
+
+-- end query 1 in stream 0 using template query56.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query57.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query57.sql
new file mode 100644
index 0000000..52dc0a2
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query57.sql
@@ -0,0 +1,48 @@
+-- start query 1 in stream 0 using template query57.tpl
+with v1 as(
+ select i_category, i_brand,
+        cc_name,
+        d_year, d_moy,
+        sum(cs_sales_price) sum_sales,
+        avg(sum(cs_sales_price)) over
+          (partition by i_category, i_brand,
+                     cc_name, d_year)
+          avg_monthly_sales,
+        rank() over
+          (partition by i_category, i_brand,
+                     cc_name
+           order by d_year, d_moy) rn
+ from item, catalog_sales, date_dim, call_center
+ where cs_item_sk = i_item_sk and
+       cs_sold_date_sk = d_date_sk and
+       cc_call_center_sk= cs_call_center_sk and
+       (
+         d_year = 2000 or
+         ( d_year = 2000-1 and d_moy =12) or
+         ( d_year = 2000+1 and d_moy =1)
+       )
+ group by i_category, i_brand,
+          cc_name , d_year, d_moy),
+ v2 as(
+ select v1.i_category, v1.i_brand
+        ,v1.d_year, v1.d_moy
+        ,v1.avg_monthly_sales
+        ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum
+ from v1, v1 v1_lag, v1 v1_lead
+ where v1.i_category = v1_lag.i_category and
+       v1.i_category = v1_lead.i_category and
+       v1.i_brand = v1_lag.i_brand and
+       v1.i_brand = v1_lead.i_brand and
+       v1. cc_name = v1_lag. cc_name and
+       v1. cc_name = v1_lead. cc_name and
+       v1.rn = v1_lag.rn + 1 and
+       v1.rn = v1_lead.rn - 1)
+  select  *
+ from v2
+ where  d_year = 2000 and
+        avg_monthly_sales > 0 and
+        case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1
+ order by sum_sales - avg_monthly_sales, 3
+ limit 100;
+
+-- end query 1 in stream 0 using template query57.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query58.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query58.sql
new file mode 100644
index 0000000..6a2dd28
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query58.sql
@@ -0,0 +1,65 @@
+-- start query 1 in stream 0 using template query58.tpl
+with ss_items as
+ (select i_item_id item_id
+        ,sum(ss_ext_sales_price) ss_item_rev 
+ from store_sales
+     ,item
+     ,date_dim
+ where ss_item_sk = i_item_sk
+   and d_date in (select d_date
+                  from date_dim
+                  where d_week_seq = (select d_week_seq 
+                                      from date_dim
+                                      where d_date = '1998-02-19' limit 1))
+   and ss_sold_date_sk   = d_date_sk
+ group by i_item_id),
+ cs_items as
+ (select i_item_id item_id
+        ,sum(cs_ext_sales_price) cs_item_rev
+  from catalog_sales
+      ,item
+      ,date_dim
+ where cs_item_sk = i_item_sk
+  and  d_date in (select d_date
+                  from date_dim
+                  where d_week_seq = (select d_week_seq 
+                                      from date_dim
+                                      where d_date = '1998-02-19' limit 1))
+  and  cs_sold_date_sk = d_date_sk
+ group by i_item_id),
+ ws_items as
+ (select i_item_id item_id
+        ,sum(ws_ext_sales_price) ws_item_rev
+  from web_sales
+      ,item
+      ,date_dim
+ where ws_item_sk = i_item_sk
+  and  d_date in (select d_date
+                  from date_dim
+                  where d_week_seq =(select d_week_seq 
+                                     from date_dim
+                                     where d_date = '1998-02-19' limit 1))
+  and ws_sold_date_sk   = d_date_sk
+ group by i_item_id)
+  select  ss_items.item_id
+       ,ss_item_rev
+       ,ss_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ss_dev
+       ,cs_item_rev
+       ,cs_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 cs_dev
+       ,ws_item_rev
+       ,ws_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ws_dev
+       ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average
+ from ss_items,cs_items,ws_items
+ where ss_items.item_id=cs_items.item_id
+   and ss_items.item_id=ws_items.item_id 
+   and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev
+   and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev
+   and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev
+   and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev
+   and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev
+   and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev
+ order by item_id
+         ,ss_item_rev
+ limit 100;
+
+-- end query 1 in stream 0 using template query58.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query59.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query59.sql
new file mode 100644
index 0000000..ee4ffeb
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query59.sql
@@ -0,0 +1,44 @@
+-- start query 1 in stream 0 using template query59.tpl
+with wss as 
+ (select d_week_seq,
+        ss_store_sk,
+        sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales,
+        sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales,
+        sum(case when (d_day_name='Tuesday') then ss_sales_price else  null end) tue_sales,
+        sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales,
+        sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales,
+        sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales,
+        sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales
+ from store_sales,date_dim
+ where d_date_sk = ss_sold_date_sk
+ group by d_week_seq,ss_store_sk
+ )
+  select  s_store_name1,s_store_id1,d_week_seq1
+       ,sun_sales1/sun_sales2,mon_sales1/mon_sales2
+       ,tue_sales1/tue_sales2,wed_sales1/wed_sales2,thu_sales1/thu_sales2
+       ,fri_sales1/fri_sales2,sat_sales1/sat_sales2
+ from
+ (select s_store_name s_store_name1,wss.d_week_seq d_week_seq1
+        ,s_store_id s_store_id1,sun_sales sun_sales1
+        ,mon_sales mon_sales1,tue_sales tue_sales1
+        ,wed_sales wed_sales1,thu_sales thu_sales1
+        ,fri_sales fri_sales1,sat_sales sat_sales1
+  from wss,store,date_dim d
+  where d.d_week_seq = wss.d_week_seq and
+        ss_store_sk = s_store_sk and 
+        d_month_seq between 1185 and 1185 + 11) y,
+ (select s_store_name s_store_name2,wss.d_week_seq d_week_seq2
+        ,s_store_id s_store_id2,sun_sales sun_sales2
+        ,mon_sales mon_sales2,tue_sales tue_sales2
+        ,wed_sales wed_sales2,thu_sales thu_sales2
+        ,fri_sales fri_sales2,sat_sales sat_sales2
+  from wss,store,date_dim d
+  where d.d_week_seq = wss.d_week_seq and
+        ss_store_sk = s_store_sk and 
+        d_month_seq between 1185+ 12 and 1185 + 23) x
+ where s_store_id1=s_store_id2
+   and d_week_seq1=d_week_seq2-52
+ order by s_store_name1,s_store_id1,d_week_seq1
+limit 100;
+
+-- end query 1 in stream 0 using template query59.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query6.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query6.sql
new file mode 100644
index 0000000..e04ff0e
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query6.sql
@@ -0,0 +1,26 @@
+-- start query 1 in stream 0 using template query6.tpl
+select  a.ca_state state, count(*) cnt
+ from customer_address a
+     ,customer c
+     ,store_sales s
+     ,date_dim d
+     ,item i
+ where       a.ca_address_sk = c.c_current_addr_sk
+ 	and c.c_customer_sk = s.ss_customer_sk
+ 	and s.ss_sold_date_sk = d.d_date_sk
+ 	and s.ss_item_sk = i.i_item_sk
+ 	and d.d_month_seq = 
+ 	     (select distinct (d_month_seq)
+ 	      from date_dim
+               where d_year = 2000
+ 	        and d_moy = 2 limit 1)
+ 	and i.i_current_price > 1.2 * 
+             (select avg(j.i_current_price) 
+ 	     from item j 
+ 	     where j.i_category = i.i_category)
+ group by a.ca_state
+ having count(*) >= 10
+ order by cnt, a.ca_state 
+ limit 100;
+
+-- end query 1 in stream 0 using template query6.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query60.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query60.sql
new file mode 100644
index 0000000..f7682bf
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query60.sql
@@ -0,0 +1,78 @@
+-- start query 1 in stream 0 using template query60.tpl
+with ss as (
+ select
+          i_item_id,sum(ss_ext_sales_price) total_sales
+ from
+ 	store_sales,
+ 	date_dim,
+         customer_address,
+         item
+ where
+         i_item_id in (select
+  i_item_id
+from
+ item
+where i_category in ('Children'))
+ and     ss_item_sk              = i_item_sk
+ and     ss_sold_date_sk         = d_date_sk
+ and     d_year                  = 1999
+ and     d_moy                   = 9
+ and     ss_addr_sk              = ca_address_sk
+ and     ca_gmt_offset           = -6 
+ group by i_item_id),
+ cs as (
+ select
+          i_item_id,sum(cs_ext_sales_price) total_sales
+ from
+ 	catalog_sales,
+ 	date_dim,
+         customer_address,
+         item
+ where
+         i_item_id               in (select
+  i_item_id
+from
+ item
+where i_category in ('Children'))
+ and     cs_item_sk              = i_item_sk
+ and     cs_sold_date_sk         = d_date_sk
+ and     d_year                  = 1999
+ and     d_moy                   = 9
+ and     cs_bill_addr_sk         = ca_address_sk
+ and     ca_gmt_offset           = -6 
+ group by i_item_id),
+ ws as (
+ select
+          i_item_id,sum(ws_ext_sales_price) total_sales
+ from
+ 	web_sales,
+ 	date_dim,
+         customer_address,
+         item
+ where
+         i_item_id               in (select
+  i_item_id
+from
+ item
+where i_category in ('Children'))
+ and     ws_item_sk              = i_item_sk
+ and     ws_sold_date_sk         = d_date_sk
+ and     d_year                  = 1999
+ and     d_moy                   = 9
+ and     ws_bill_addr_sk         = ca_address_sk
+ and     ca_gmt_offset           = -6
+ group by i_item_id)
+  select   
+  i_item_id
+,sum(total_sales) total_sales
+ from  (select * from ss 
+        union all
+        select * from cs 
+        union all
+        select * from ws) tmp1
+ group by i_item_id
+ order by i_item_id
+      ,total_sales
+ limit 100;
+
+-- end query 1 in stream 0 using template query60.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query61.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query61.sql
new file mode 100644
index 0000000..c3a4617
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query61.sql
@@ -0,0 +1,44 @@
+-- start query 1 in stream 0 using template query61.tpl
+select  promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100
+from
+  (select sum(ss_ext_sales_price) promotions
+   from  store_sales
+        ,store
+        ,promotion
+        ,date_dim
+        ,customer
+        ,customer_address 
+        ,item
+   where ss_sold_date_sk = d_date_sk
+   and   ss_store_sk = s_store_sk
+   and   ss_promo_sk = p_promo_sk
+   and   ss_customer_sk= c_customer_sk
+   and   ca_address_sk = c_current_addr_sk
+   and   ss_item_sk = i_item_sk 
+   and   ca_gmt_offset = -7
+   and   i_category = 'Books'
+   and   (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y')
+   and   s_gmt_offset = -7
+   and   d_year = 1999
+   and   d_moy  = 11) promotional_sales,
+  (select sum(ss_ext_sales_price) total
+   from  store_sales
+        ,store
+        ,date_dim
+        ,customer
+        ,customer_address
+        ,item
+   where ss_sold_date_sk = d_date_sk
+   and   ss_store_sk = s_store_sk
+   and   ss_customer_sk= c_customer_sk
+   and   ca_address_sk = c_current_addr_sk
+   and   ss_item_sk = i_item_sk
+   and   ca_gmt_offset = -7
+   and   i_category = 'Books'
+   and   s_gmt_offset = -7
+   and   d_year = 1999
+   and   d_moy  = 11) all_sales
+order by promotions, total
+limit 100;
+
+-- end query 1 in stream 0 using template query61.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query62.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query62.sql
new file mode 100644
index 0000000..11edf3c
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query62.sql
@@ -0,0 +1,35 @@
+-- start query 1 in stream 0 using template query62.tpl
+select  
+   substr(w_warehouse_name,1,20)
+  ,sm_type
+  ,web_name
+  ,sum(case when (ws_ship_date_sk - ws_sold_date_sk <= 30 ) then 1 else 0 end)  as "30 days" 
+  ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 30) and 
+                 (ws_ship_date_sk - ws_sold_date_sk <= 60) then 1 else 0 end )  as "31-60 days" 
+  ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 60) and 
+                 (ws_ship_date_sk - ws_sold_date_sk <= 90) then 1 else 0 end)  as "61-90 days" 
+  ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 90) and
+                 (ws_ship_date_sk - ws_sold_date_sk <= 120) then 1 else 0 end)  as "91-120 days" 
+  ,sum(case when (ws_ship_date_sk - ws_sold_date_sk  > 120) then 1 else 0 end)  as ">120 days" 
+from
+   web_sales
+  ,warehouse
+  ,ship_mode
+  ,web_site
+  ,date_dim
+where
+    d_month_seq between 1212 and 1212 + 11
+and ws_ship_date_sk   = d_date_sk
+and ws_warehouse_sk   = w_warehouse_sk
+and ws_ship_mode_sk   = sm_ship_mode_sk
+and ws_web_site_sk    = web_site_sk
+group by
+   substr(w_warehouse_name,1,20)
+  ,sm_type
+  ,web_name
+order by substr(w_warehouse_name,1,20)
+        ,sm_type
+       ,web_name
+limit 100;
+
+-- end query 1 in stream 0 using template query62.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query63.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query63.sql
new file mode 100644
index 0000000..e301d65
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query63.sql
@@ -0,0 +1,29 @@
+-- start query 1 in stream 0 using template query63.tpl
+select  * 
+from (select i_manager_id
+             ,sum(ss_sales_price) sum_sales
+             ,avg(sum(ss_sales_price)) over (partition by i_manager_id) avg_monthly_sales
+      from item
+          ,store_sales
+          ,date_dim
+          ,store
+      where ss_item_sk = i_item_sk
+        and ss_sold_date_sk = d_date_sk
+        and ss_store_sk = s_store_sk
+        and d_month_seq in (1212,1212+1,1212+2,1212+3,1212+4,1212+5,1212+6,1212+7,1212+8,1212+9,1212+10,1212+11)
+        and ((    i_category in ('Books','Children','Electronics')
+              and i_class in ('personal','portable','reference','self-help')
+              and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7',
+		                  'exportiunivamalg #9','scholaramalgamalg #9'))
+           or(    i_category in ('Women','Music','Men')
+              and i_class in ('accessories','classical','fragrances','pants')
+              and i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1',
+		                 'importoamalg #1')))
+group by i_manager_id, d_moy) tmp1
+where case when avg_monthly_sales > 0 then abs (sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1
+order by i_manager_id
+        ,avg_monthly_sales
+        ,sum_sales
+limit 100;
+
+-- end query 1 in stream 0 using template query63.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query64.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query64.sql
new file mode 100644
index 0000000..e1f5fc6
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query64.sql
@@ -0,0 +1,120 @@
+-- start query 1 in stream 0 using template query64.tpl
+with cs_ui as
+ (select cs_item_sk
+        ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund
+  from catalog_sales
+      ,catalog_returns
+  where cs_item_sk = cr_item_sk
+    and cs_order_number = cr_order_number
+  group by cs_item_sk
+  having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)),
+cross_sales as
+ (select i_product_name product_name
+     ,i_item_sk item_sk
+     ,s_store_name store_name
+     ,s_zip store_zip
+     ,ad1.ca_street_number b_street_number
+     ,ad1.ca_street_name b_street_name
+     ,ad1.ca_city b_city
+     ,ad1.ca_zip b_zip
+     ,ad2.ca_street_number c_street_number
+     ,ad2.ca_street_name c_street_name
+     ,ad2.ca_city c_city
+     ,ad2.ca_zip c_zip
+     ,d1.d_year as syear
+     ,d2.d_year as fsyear
+     ,d3.d_year s2year
+     ,count(*) cnt
+     ,sum(ss_wholesale_cost) s1
+     ,sum(ss_list_price) s2
+     ,sum(ss_coupon_amt) s3
+  FROM   store_sales
+        ,store_returns
+        ,cs_ui
+        ,date_dim d1
+        ,date_dim d2
+        ,date_dim d3
+        ,store
+        ,customer
+        ,customer_demographics cd1
+        ,customer_demographics cd2
+        ,promotion
+        ,household_demographics hd1
+        ,household_demographics hd2
+        ,customer_address ad1
+        ,customer_address ad2
+        ,income_band ib1
+        ,income_band ib2
+        ,item
+  WHERE  ss_store_sk = s_store_sk AND
+         ss_sold_date_sk = d1.d_date_sk AND
+         ss_customer_sk = c_customer_sk AND
+         ss_cdemo_sk= cd1.cd_demo_sk AND
+         ss_hdemo_sk = hd1.hd_demo_sk AND
+         ss_addr_sk = ad1.ca_address_sk and
+         ss_item_sk = i_item_sk and
+         ss_item_sk = sr_item_sk and
+         ss_ticket_number = sr_ticket_number and
+         ss_item_sk = cs_ui.cs_item_sk and
+         c_current_cdemo_sk = cd2.cd_demo_sk AND
+         c_current_hdemo_sk = hd2.hd_demo_sk AND
+         c_current_addr_sk = ad2.ca_address_sk and
+         c_first_sales_date_sk = d2.d_date_sk and
+         c_first_shipto_date_sk = d3.d_date_sk and
+         ss_promo_sk = p_promo_sk and
+         hd1.hd_income_band_sk = ib1.ib_income_band_sk and
+         hd2.hd_income_band_sk = ib2.ib_income_band_sk and
+         cd1.cd_marital_status <> cd2.cd_marital_status and
+         i_color in ('maroon','burnished','dim','steel','navajo','chocolate') and
+         i_current_price between 35 and 35 + 10 and
+         i_current_price between 35 + 1 and 35 + 15
+group by i_product_name
+       ,i_item_sk
+       ,s_store_name
+       ,s_zip
+       ,ad1.ca_street_number
+       ,ad1.ca_street_name
+       ,ad1.ca_city
+       ,ad1.ca_zip
+       ,ad2.ca_street_number
+       ,ad2.ca_street_name
+       ,ad2.ca_city
+       ,ad2.ca_zip
+       ,d1.d_year
+       ,d2.d_year
+       ,d3.d_year
+)
+select cs1.product_name
+     ,cs1.store_name
+     ,cs1.store_zip
+     ,cs1.b_street_number
+     ,cs1.b_street_name
+     ,cs1.b_city
+     ,cs1.b_zip
+     ,cs1.c_street_number
+     ,cs1.c_street_name
+     ,cs1.c_city
+     ,cs1.c_zip
+     ,cs1.syear
+     ,cs1.cnt
+     ,cs1.s1 as s11
+     ,cs1.s2 as s21
+     ,cs1.s3 as s31
+     ,cs2.s1 as s12
+     ,cs2.s2 as s22
+     ,cs2.s3 as s32
+     ,cs2.syear
+     ,cs2.cnt
+from cross_sales cs1,cross_sales cs2
+where cs1.item_sk=cs2.item_sk and
+     cs1.syear = 2000 and
+     cs2.syear = 2000 + 1 and
+     cs2.cnt <= cs1.cnt and
+     cs1.store_name = cs2.store_name and
+     cs1.store_zip = cs2.store_zip
+order by cs1.product_name
+       ,cs1.store_name
+       ,cs2.cnt
+       ,cs2.s1;
+
+-- end query 1 in stream 0 using template query64.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query65.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query65.sql
new file mode 100644
index 0000000..959bf0c
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query65.sql
@@ -0,0 +1,29 @@
+-- start query 1 in stream 0 using template query65.tpl
+select 
+	s_store_name,
+	i_item_desc,
+	sc.revenue,
+	i_current_price,
+	i_wholesale_cost,
+	i_brand
+ from store, item,
+     (select ss_store_sk, avg(revenue) as ave
+ 	from
+ 	    (select  ss_store_sk, ss_item_sk, 
+ 		     sum(ss_sales_price) as revenue
+ 		from store_sales, date_dim
+ 		where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11
+ 		group by ss_store_sk, ss_item_sk) sa
+ 	group by ss_store_sk) sb,
+     (select  ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue
+ 	from store_sales, date_dim
+ 	where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11
+ 	group by ss_store_sk, ss_item_sk) sc
+ where sb.ss_store_sk = sc.ss_store_sk and 
+       sc.revenue <= 0.1 * sb.ave and
+       s_store_sk = sc.ss_store_sk and
+       i_item_sk = sc.ss_item_sk
+ order by s_store_name, i_item_desc
+limit 100;
+
+-- end query 1 in stream 0 using template query65.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query66.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query66.sql
new file mode 100644
index 0000000..13a5d05
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query66.sql
@@ -0,0 +1,220 @@
+-- start query 1 in stream 0 using template query66.tpl
+select   
+         w_warehouse_name
+ 	,w_warehouse_sq_ft
+ 	,w_city
+ 	,w_county
+ 	,w_state
+ 	,w_country
+        ,ship_carriers
+        ,year
+ 	,sum(jan_sales) as jan_sales
+ 	,sum(feb_sales) as feb_sales
+ 	,sum(mar_sales) as mar_sales
+ 	,sum(apr_sales) as apr_sales
+ 	,sum(may_sales) as may_sales
+ 	,sum(jun_sales) as jun_sales
+ 	,sum(jul_sales) as jul_sales
+ 	,sum(aug_sales) as aug_sales
+ 	,sum(sep_sales) as sep_sales
+ 	,sum(oct_sales) as oct_sales
+ 	,sum(nov_sales) as nov_sales
+ 	,sum(dec_sales) as dec_sales
+ 	,sum(jan_sales/w_warehouse_sq_ft) as jan_sales_per_sq_foot
+ 	,sum(feb_sales/w_warehouse_sq_ft) as feb_sales_per_sq_foot
+ 	,sum(mar_sales/w_warehouse_sq_ft) as mar_sales_per_sq_foot
+ 	,sum(apr_sales/w_warehouse_sq_ft) as apr_sales_per_sq_foot
+ 	,sum(may_sales/w_warehouse_sq_ft) as may_sales_per_sq_foot
+ 	,sum(jun_sales/w_warehouse_sq_ft) as jun_sales_per_sq_foot
+ 	,sum(jul_sales/w_warehouse_sq_ft) as jul_sales_per_sq_foot
+ 	,sum(aug_sales/w_warehouse_sq_ft) as aug_sales_per_sq_foot
+ 	,sum(sep_sales/w_warehouse_sq_ft) as sep_sales_per_sq_foot
+ 	,sum(oct_sales/w_warehouse_sq_ft) as oct_sales_per_sq_foot
+ 	,sum(nov_sales/w_warehouse_sq_ft) as nov_sales_per_sq_foot
+ 	,sum(dec_sales/w_warehouse_sq_ft) as dec_sales_per_sq_foot
+ 	,sum(jan_net) as jan_net
+ 	,sum(feb_net) as feb_net
+ 	,sum(mar_net) as mar_net
+ 	,sum(apr_net) as apr_net
+ 	,sum(may_net) as may_net
+ 	,sum(jun_net) as jun_net
+ 	,sum(jul_net) as jul_net
+ 	,sum(aug_net) as aug_net
+ 	,sum(sep_net) as sep_net
+ 	,sum(oct_net) as oct_net
+ 	,sum(nov_net) as nov_net
+ 	,sum(dec_net) as dec_net
+ from (
+     select 
+ 	w_warehouse_name
+ 	,w_warehouse_sq_ft
+ 	,w_city
+ 	,w_county
+ 	,w_state
+ 	,w_country
+ 	,concat('DIAMOND', ',', 'AIRBORNE') as ship_carriers
+       ,d_year as year
+ 	,sum(case when d_moy = 1 
+ 		then ws_sales_price* ws_quantity else 0 end) as jan_sales
+ 	,sum(case when d_moy = 2 
+ 		then ws_sales_price* ws_quantity else 0 end) as feb_sales
+ 	,sum(case when d_moy = 3 
+ 		then ws_sales_price* ws_quantity else 0 end) as mar_sales
+ 	,sum(case when d_moy = 4 
+ 		then ws_sales_price* ws_quantity else 0 end) as apr_sales
+ 	,sum(case when d_moy = 5 
+ 		then ws_sales_price* ws_quantity else 0 end) as may_sales
+ 	,sum(case when d_moy = 6 
+ 		then ws_sales_price* ws_quantity else 0 end) as jun_sales
+ 	,sum(case when d_moy = 7 
+ 		then ws_sales_price* ws_quantity else 0 end) as jul_sales
+ 	,sum(case when d_moy = 8 
+ 		then ws_sales_price* ws_quantity else 0 end) as aug_sales
+ 	,sum(case when d_moy = 9 
+ 		then ws_sales_price* ws_quantity else 0 end) as sep_sales
+ 	,sum(case when d_moy = 10 
+ 		then ws_sales_price* ws_quantity else 0 end) as oct_sales
+ 	,sum(case when d_moy = 11
+ 		then ws_sales_price* ws_quantity else 0 end) as nov_sales
+ 	,sum(case when d_moy = 12
+ 		then ws_sales_price* ws_quantity else 0 end) as dec_sales
+ 	,sum(case when d_moy = 1 
+ 		then ws_net_paid_inc_tax * ws_quantity else 0 end) as jan_net
+ 	,sum(case when d_moy = 2
+ 		then ws_net_paid_inc_tax * ws_quantity else 0 end) as feb_net
+ 	,sum(case when d_moy = 3 
+ 		then ws_net_paid_inc_tax * ws_quantity else 0 end) as mar_net
+ 	,sum(case when d_moy = 4 
+ 		then ws_net_paid_inc_tax * ws_quantity else 0 end) as apr_net
+ 	,sum(case when d_moy = 5 
+ 		then ws_net_paid_inc_tax * ws_quantity else 0 end) as may_net
+ 	,sum(case when d_moy = 6 
+ 		then ws_net_paid_inc_tax * ws_quantity else 0 end) as jun_net
+ 	,sum(case when d_moy = 7 
+ 		then ws_net_paid_inc_tax * ws_quantity else 0 end) as jul_net
+ 	,sum(case when d_moy = 8 
+ 		then ws_net_paid_inc_tax * ws_quantity else 0 end) as aug_net
+ 	,sum(case when d_moy = 9 
+ 		then ws_net_paid_inc_tax * ws_quantity else 0 end) as sep_net
+ 	,sum(case when d_moy = 10 
+ 		then ws_net_paid_inc_tax * ws_quantity else 0 end) as oct_net
+ 	,sum(case when d_moy = 11
+ 		then ws_net_paid_inc_tax * ws_quantity else 0 end) as nov_net
+ 	,sum(case when d_moy = 12
+ 		then ws_net_paid_inc_tax * ws_quantity else 0 end) as dec_net
+     from
+          web_sales
+         ,warehouse
+         ,date_dim
+         ,time_dim
+ 	  ,ship_mode
+     where
+            ws_warehouse_sk =  w_warehouse_sk
+        and ws_sold_date_sk = d_date_sk
+        and ws_sold_time_sk = t_time_sk
+ 	and ws_ship_mode_sk = sm_ship_mode_sk
+        and d_year = 2002
+ 	and t_time between 49530 and 49530+28800 
+ 	and sm_carrier in ('DIAMOND','AIRBORNE')
+     group by 
+        w_warehouse_name
+ 	,w_warehouse_sq_ft
+ 	,w_city
+ 	,w_county
+ 	,w_state
+ 	,w_country
+       ,d_year
+ union all
+     select 
+ 	w_warehouse_name
+ 	,w_warehouse_sq_ft
+ 	,w_city
+ 	,w_county
+ 	,w_state
+ 	,w_country
+    ,concat('DIAMOND', ',', 'AIRBORNE') as ship_carriers
+  ,d_year as year
+ 	,sum(case when d_moy = 1 
+ 		then cs_ext_sales_price* cs_quantity else 0 end) as jan_sales
+ 	,sum(case when d_moy = 2 
+ 		then cs_ext_sales_price* cs_quantity else 0 end) as feb_sales
+ 	,sum(case when d_moy = 3 
+ 		then cs_ext_sales_price* cs_quantity else 0 end) as mar_sales
+ 	,sum(case when d_moy = 4 
+ 		then cs_ext_sales_price* cs_quantity else 0 end) as apr_sales
+ 	,sum(case when d_moy = 5 
+ 		then cs_ext_sales_price* cs_quantity else 0 end) as may_sales
+ 	,sum(case when d_moy = 6 
+ 		then cs_ext_sales_price* cs_quantity else 0 end) as jun_sales
+ 	,sum(case when d_moy = 7 
+ 		then cs_ext_sales_price* cs_quantity else 0 end) as jul_sales
+ 	,sum(case when d_moy = 8 
+ 		then cs_ext_sales_price* cs_quantity else 0 end) as aug_sales
+ 	,sum(case when d_moy = 9 
+ 		then cs_ext_sales_price* cs_quantity else 0 end) as sep_sales
+ 	,sum(case when d_moy = 10 
+ 		then cs_ext_sales_price* cs_quantity else 0 end) as oct_sales
+ 	,sum(case when d_moy = 11
+ 		then cs_ext_sales_price* cs_quantity else 0 end) as nov_sales
+ 	,sum(case when d_moy = 12
+ 		then cs_ext_sales_price* cs_quantity else 0 end) as dec_sales
+ 	,sum(case when d_moy = 1 
+ 		then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jan_net
+ 	,sum(case when d_moy = 2 
+ 		then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as feb_net
+ 	,sum(case when d_moy = 3 
+ 		then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as mar_net
+ 	,sum(case when d_moy = 4 
+ 		then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as apr_net
+ 	,sum(case when d_moy = 5 
+ 		then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as may_net
+ 	,sum(case when d_moy = 6 
+ 		then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jun_net
+ 	,sum(case when d_moy = 7 
+ 		then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jul_net
+ 	,sum(case when d_moy = 8 
+ 		then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as aug_net
+ 	,sum(case when d_moy = 9 
+ 		then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as sep_net
+ 	,sum(case when d_moy = 10 
+ 		then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as oct_net
+ 	,sum(case when d_moy = 11
+ 		then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as nov_net
+ 	,sum(case when d_moy = 12
+ 		then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as dec_net
+     from
+          catalog_sales
+         ,warehouse
+         ,date_dim
+         ,time_dim
+ 	 ,ship_mode
+     where
+            cs_warehouse_sk =  w_warehouse_sk
+        and cs_sold_date_sk = d_date_sk
+        and cs_sold_time_sk = t_time_sk
+ 	and cs_ship_mode_sk = sm_ship_mode_sk
+        and d_year = 2002
+ 	and t_time between 49530 AND 49530+28800 
+ 	and sm_carrier in ('DIAMOND','AIRBORNE')
+     group by 
+        w_warehouse_name
+ 	,w_warehouse_sq_ft
+ 	,w_city
+ 	,w_county
+ 	,w_state
+ 	,w_country
+       ,d_year
+ ) x
+ group by 
+        w_warehouse_name
+ 	,w_warehouse_sq_ft
+ 	,w_city
+ 	,w_county
+ 	,w_state
+ 	,w_country
+ 	,ship_carriers
+       ,year
+ order by w_warehouse_name
+ limit 100;
+
+-- end query 1 in stream 0 using template query66.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query68.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query68.sql
new file mode 100644
index 0000000..584891a
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query68.sql
@@ -0,0 +1,42 @@
+-- start query 1 in stream 0 using template query68.tpl
+select  c_last_name
+       ,c_first_name
+       ,ca_city
+       ,bought_city
+       ,ss_ticket_number
+       ,extended_price
+       ,extended_tax
+       ,list_price
+ from (select ss_ticket_number
+             ,ss_customer_sk
+             ,ca_city bought_city
+             ,sum(ss_ext_sales_price) extended_price 
+             ,sum(ss_ext_list_price) list_price
+             ,sum(ss_ext_tax) extended_tax 
+       from store_sales
+           ,date_dim
+           ,store
+           ,household_demographics
+           ,customer_address 
+       where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+         and store_sales.ss_store_sk = store.s_store_sk  
+        and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+        and store_sales.ss_addr_sk = customer_address.ca_address_sk
+        and date_dim.d_dom between 1 and 2 
+        and (household_demographics.hd_dep_count = 4 or
+             household_demographics.hd_vehicle_count= 2)
+        and date_dim.d_year in (1998,1998+1,1998+2)
+        and store.s_city in ('Rosedale','Bethlehem')
+       group by ss_ticket_number
+               ,ss_customer_sk
+               ,ss_addr_sk,ca_city) dn
+      ,customer
+      ,customer_address current_addr
+ where ss_customer_sk = c_customer_sk
+   and customer.c_current_addr_sk = current_addr.ca_address_sk
+   and current_addr.ca_city <> bought_city
+ order by c_last_name
+         ,ss_ticket_number
+ limit 100;
+
+-- end query 1 in stream 0 using template query68.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query69.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query69.sql
new file mode 100644
index 0000000..c92f3e3
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query69.sql
@@ -0,0 +1,47 @@
+-- start query 1 in stream 0 using template query69.tpl
+select  
+  cd_gender,
+  cd_marital_status,
+  cd_education_status,
+  count(*) cnt1,
+  cd_purchase_estimate,
+  count(*) cnt2,
+  cd_credit_rating,
+  count(*) cnt3
+ from
+  customer c,customer_address ca,customer_demographics
+ where
+  c.c_current_addr_sk = ca.ca_address_sk and
+  ca_state in ('CO','IL','MN') and
+  cd_demo_sk = c.c_current_cdemo_sk and 
+  exists (select *
+          from store_sales,date_dim
+          where c.c_customer_sk = ss_customer_sk and
+                ss_sold_date_sk = d_date_sk and
+                d_year = 1999 and
+                d_moy between 1 and 1+2) and
+   (not exists (select *
+            from web_sales,date_dim
+            where c.c_customer_sk = ws_bill_customer_sk and
+                  ws_sold_date_sk = d_date_sk and
+                  d_year = 1999 and
+                  d_moy between 1 and 1+2) and
+    not exists (select * 
+            from catalog_sales,date_dim
+            where c.c_customer_sk = cs_ship_customer_sk and
+                  cs_sold_date_sk = d_date_sk and
+                  d_year = 1999 and
+                  d_moy between 1 and 1+2))
+ group by cd_gender,
+          cd_marital_status,
+          cd_education_status,
+          cd_purchase_estimate,
+          cd_credit_rating
+ order by cd_gender,
+          cd_marital_status,
+          cd_education_status,
+          cd_purchase_estimate,
+          cd_credit_rating
+ limit 100;
+
+-- end query 1 in stream 0 using template query69.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query7.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query7.sql
new file mode 100644
index 0000000..ae69203
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query7.sql
@@ -0,0 +1,21 @@
+-- start query 1 in stream 0 using template query7.tpl
+select  i_item_id, 
+        avg(ss_quantity) agg1,
+        avg(ss_list_price) agg2,
+        avg(ss_coupon_amt) agg3,
+        avg(ss_sales_price) agg4 
+ from store_sales, customer_demographics, date_dim, item, promotion
+ where ss_sold_date_sk = d_date_sk and
+       ss_item_sk = i_item_sk and
+       ss_cdemo_sk = cd_demo_sk and
+       ss_promo_sk = p_promo_sk and
+       cd_gender = 'F' and 
+       cd_marital_status = 'W' and
+       cd_education_status = 'Primary' and
+       (p_channel_email = 'N' or p_channel_event = 'N') and
+       d_year = 1998 
+ group by i_item_id
+ order by i_item_id
+ limit 100;
+
+-- end query 1 in stream 0 using template query7.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query71.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query71.sql
new file mode 100644
index 0000000..5ab1f6c
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query71.sql
@@ -0,0 +1,40 @@
+-- start query 1 in stream 0 using template query71.tpl
+select i_brand_id brand_id, i_brand brand,t_hour,t_minute,
+ 	sum(ext_price) ext_price
+ from item, (select ws_ext_sales_price as ext_price, 
+                        ws_sold_date_sk as sold_date_sk,
+                        ws_item_sk as sold_item_sk,
+                        ws_sold_time_sk as time_sk  
+                 from web_sales,date_dim
+                 where d_date_sk = ws_sold_date_sk
+                   and d_moy=12
+                   and d_year=2000
+                 union all
+                 select cs_ext_sales_price as ext_price,
+                        cs_sold_date_sk as sold_date_sk,
+                        cs_item_sk as sold_item_sk,
+                        cs_sold_time_sk as time_sk
+                 from catalog_sales,date_dim
+                 where d_date_sk = cs_sold_date_sk
+                   and d_moy=12
+                   and d_year=2000
+                 union all
+                 select ss_ext_sales_price as ext_price,
+                        ss_sold_date_sk as sold_date_sk,
+                        ss_item_sk as sold_item_sk,
+                        ss_sold_time_sk as time_sk
+                 from store_sales,date_dim
+                 where d_date_sk = ss_sold_date_sk
+                   and d_moy=12
+                   and d_year=2000
+                 ) tmp,time_dim
+ where
+   sold_item_sk = i_item_sk
+   and i_manager_id=1
+   and time_sk = t_time_sk
+   and (t_meal_time = 'breakfast' or t_meal_time = 'dinner')
+ group by i_brand, i_brand_id,t_hour,t_minute
+ order by ext_price desc, i_brand_id
+ ;
+
+-- end query 1 in stream 0 using template query71.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query72.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query72.sql
new file mode 100644
index 0000000..c1967b5
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query72.sql
@@ -0,0 +1,29 @@
+-- start query 1 in stream 0 using template query72.tpl
+select  i_item_desc
+      ,w_warehouse_name
+      ,d1.d_week_seq
+      ,sum(case when p_promo_sk is null then 1 else 0 end) no_promo
+      ,sum(case when p_promo_sk is not null then 1 else 0 end) promo
+      ,count(*) total_cnt
+from catalog_sales
+join inventory on (cs_item_sk = inv_item_sk)
+join warehouse on (w_warehouse_sk=inv_warehouse_sk)
+join item on (i_item_sk = cs_item_sk)
+join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk)
+join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk)
+join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk)
+join date_dim d2 on (inv_date_sk = d2.d_date_sk)
+join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk)
+left outer join promotion on (cs_promo_sk=p_promo_sk)
+left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number)
+where d1.d_week_seq = d2.d_week_seq
+  and inv_quantity_on_hand < cs_quantity 
+  and cast(d3.d_date as timestamp) > cast(d1.d_date as timestamp) + interval 5 days
+  and hd_buy_potential = '1001-5000'
+  and d1.d_year = 2001
+  and cd_marital_status = 'M'
+group by i_item_desc,w_warehouse_name,d1.d_week_seq
+order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq
+limit 100;
+
+-- end query 1 in stream 0 using template query72.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query73.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query73.sql
new file mode 100644
index 0000000..b28c338
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query73.sql
@@ -0,0 +1,28 @@
+-- start query 1 in stream 0 using template query73.tpl
+select c_last_name
+       ,c_first_name
+       ,c_salutation
+       ,c_preferred_cust_flag 
+       ,ss_ticket_number
+       ,cnt from
+   (select ss_ticket_number
+          ,ss_customer_sk
+          ,count(*) cnt
+    from store_sales,date_dim,store,household_demographics
+    where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+    and store_sales.ss_store_sk = store.s_store_sk  
+    and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+    and date_dim.d_dom between 1 and 2 
+    and (household_demographics.hd_buy_potential = '1001-5000' or
+         household_demographics.hd_buy_potential = '5001-10000')
+    and household_demographics.hd_vehicle_count > 0
+    and case when household_demographics.hd_vehicle_count > 0 then 
+             household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1
+    and date_dim.d_year in (1998,1998+1,1998+2)
+    and store.s_county in ('Kittitas County','Adams County','Richland County','Furnas County')
+    group by ss_ticket_number,ss_customer_sk) dj,customer
+    where ss_customer_sk = c_customer_sk
+      and cnt between 1 and 5
+    order by cnt desc, c_last_name asc;
+
+-- end query 1 in stream 0 using template query73.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query74.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query74.sql
new file mode 100644
index 0000000..8802ca4
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query74.sql
@@ -0,0 +1,61 @@
+-- start query 1 in stream 0 using template query74.tpl
+with year_total as (
+ select c_customer_id customer_id
+       ,c_first_name customer_first_name
+       ,c_last_name customer_last_name
+       ,d_year as year
+       ,max(ss_net_paid) year_total
+       ,'s' sale_type
+ from customer
+     ,store_sales
+     ,date_dim
+ where c_customer_sk = ss_customer_sk
+   and ss_sold_date_sk = d_date_sk
+   and d_year in (2001,2001+1)
+ group by c_customer_id
+         ,c_first_name
+         ,c_last_name
+         ,d_year
+ union all
+ select c_customer_id customer_id
+       ,c_first_name customer_first_name
+       ,c_last_name customer_last_name
+       ,d_year as year
+       ,max(ws_net_paid) year_total
+       ,'w' sale_type
+ from customer
+     ,web_sales
+     ,date_dim
+ where c_customer_sk = ws_bill_customer_sk
+   and ws_sold_date_sk = d_date_sk
+   and d_year in (2001,2001+1)
+ group by c_customer_id
+         ,c_first_name
+         ,c_last_name
+         ,d_year
+         )
+  select 
+        t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name
+ from year_total t_s_firstyear
+     ,year_total t_s_secyear
+     ,year_total t_w_firstyear
+     ,year_total t_w_secyear
+ where t_s_secyear.customer_id = t_s_firstyear.customer_id
+         and t_s_firstyear.customer_id = t_w_secyear.customer_id
+         and t_s_firstyear.customer_id = t_w_firstyear.customer_id
+         and t_s_firstyear.sale_type = 's'
+         and t_w_firstyear.sale_type = 'w'
+         and t_s_secyear.sale_type = 's'
+         and t_w_secyear.sale_type = 'w'
+         and t_s_firstyear.year = 2001
+         and t_s_secyear.year = 2001+1
+         and t_w_firstyear.year = 2001
+         and t_w_secyear.year = 2001+1
+         and t_s_firstyear.year_total > 0
+         and t_w_firstyear.year_total > 0
+         and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end
+           > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end
+ order by 2,1,3
+limit 100;
+
+-- end query 1 in stream 0 using template query74.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query75.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query75.sql
new file mode 100644
index 0000000..c4bce31
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query75.sql
@@ -0,0 +1,70 @@
+-- start query 1 in stream 0 using template query75.tpl
+WITH all_sales AS (
+ SELECT d_year
+       ,i_brand_id
+       ,i_class_id
+       ,i_category_id
+       ,i_manufact_id
+       ,SUM(sales_cnt) AS sales_cnt
+       ,SUM(sales_amt) AS sales_amt
+ FROM (SELECT d_year
+             ,i_brand_id
+             ,i_class_id
+             ,i_category_id
+             ,i_manufact_id
+             ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt
+             ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt
+       FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk
+                          JOIN date_dim ON d_date_sk=cs_sold_date_sk
+                          LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number 
+                                                    AND cs_item_sk=cr_item_sk)
+       WHERE i_category='Sports'
+       UNION
+       SELECT d_year
+             ,i_brand_id
+             ,i_class_id
+             ,i_category_id
+             ,i_manufact_id
+             ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt
+             ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt
+       FROM store_sales JOIN item ON i_item_sk=ss_item_sk
+                        JOIN date_dim ON d_date_sk=ss_sold_date_sk
+                        LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number 
+                                                AND ss_item_sk=sr_item_sk)
+       WHERE i_category='Sports'
+       UNION
+       SELECT d_year
+             ,i_brand_id
+             ,i_class_id
+             ,i_category_id
+             ,i_manufact_id
+             ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt
+             ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt
+       FROM web_sales JOIN item ON i_item_sk=ws_item_sk
+                      JOIN date_dim ON d_date_sk=ws_sold_date_sk
+                      LEFT JOIN web_returns ON (ws_order_number=wr_order_number 
+                                            AND ws_item_sk=wr_item_sk)
+       WHERE i_category='Sports') sales_detail
+ GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id)
+ SELECT  prev_yr.d_year AS prev_year
+                          ,curr_yr.d_year AS year
+                          ,curr_yr.i_brand_id
+                          ,curr_yr.i_class_id
+                          ,curr_yr.i_category_id
+                          ,curr_yr.i_manufact_id
+                          ,prev_yr.sales_cnt AS prev_yr_cnt
+                          ,curr_yr.sales_cnt AS curr_yr_cnt
+                          ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff
+                          ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff
+ FROM all_sales curr_yr, all_sales prev_yr
+ WHERE curr_yr.i_brand_id=prev_yr.i_brand_id
+   AND curr_yr.i_class_id=prev_yr.i_class_id
+   AND curr_yr.i_category_id=prev_yr.i_category_id
+   AND curr_yr.i_manufact_id=prev_yr.i_manufact_id
+   AND curr_yr.d_year=2002
+   AND prev_yr.d_year=2002-1
+   AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9
+ ORDER BY sales_cnt_diff,sales_amt_diff
+ limit 100;
+
+-- end query 1 in stream 0 using template query75.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query76.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query76.sql
new file mode 100644
index 0000000..f1fc21b
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query76.sql
@@ -0,0 +1,24 @@
+-- start query 1 in stream 0 using template query76.tpl
+select  channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM (
+        SELECT 'store' as channel, 'ss_addr_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price
+         FROM store_sales, item, date_dim
+         WHERE ss_addr_sk IS NULL
+           AND ss_sold_date_sk=d_date_sk
+           AND ss_item_sk=i_item_sk
+        UNION ALL
+        SELECT 'web' as channel, 'ws_web_page_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price
+         FROM web_sales, item, date_dim
+         WHERE ws_web_page_sk IS NULL
+           AND ws_sold_date_sk=d_date_sk
+           AND ws_item_sk=i_item_sk
+        UNION ALL
+        SELECT 'catalog' as channel, 'cs_warehouse_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price
+         FROM catalog_sales, item, date_dim
+         WHERE cs_warehouse_sk IS NULL
+           AND cs_sold_date_sk=d_date_sk
+           AND cs_item_sk=i_item_sk) foo
+GROUP BY channel, col_name, d_year, d_qoy, i_category
+ORDER BY channel, col_name, d_year, d_qoy, i_category
+limit 100;
+
+-- end query 1 in stream 0 using template query76.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query78.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query78.sql
new file mode 100644
index 0000000..b33c5e6
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query78.sql
@@ -0,0 +1,58 @@
+-- start query 1 in stream 0 using template query78.tpl
+with ws as
+  (select d_year AS ws_sold_year, ws_item_sk,
+    ws_bill_customer_sk ws_customer_sk,
+    sum(ws_quantity) ws_qty,
+    sum(ws_wholesale_cost) ws_wc,
+    sum(ws_sales_price) ws_sp
+   from web_sales
+   left join web_returns on wr_order_number=ws_order_number and ws_item_sk=wr_item_sk
+   join date_dim on ws_sold_date_sk = d_date_sk
+   where wr_order_number is null
+   group by d_year, ws_item_sk, ws_bill_customer_sk
+   ),
+cs as
+  (select d_year AS cs_sold_year, cs_item_sk,
+    cs_bill_customer_sk cs_customer_sk,
+    sum(cs_quantity) cs_qty,
+    sum(cs_wholesale_cost) cs_wc,
+    sum(cs_sales_price) cs_sp
+   from catalog_sales
+   left join catalog_returns on cr_order_number=cs_order_number and cs_item_sk=cr_item_sk
+   join date_dim on cs_sold_date_sk = d_date_sk
+   where cr_order_number is null
+   group by d_year, cs_item_sk, cs_bill_customer_sk
+   ),
+ss as
+  (select d_year AS ss_sold_year, ss_item_sk,
+    ss_customer_sk,
+    sum(ss_quantity) ss_qty,
+    sum(ss_wholesale_cost) ss_wc,
+    sum(ss_sales_price) ss_sp
+   from store_sales
+   left join store_returns on sr_ticket_number=ss_ticket_number and ss_item_sk=sr_item_sk
+   join date_dim on ss_sold_date_sk = d_date_sk
+   where sr_ticket_number is null
+   group by d_year, ss_item_sk, ss_customer_sk
+   )
+ select 
+ss_sold_year, ss_item_sk, ss_customer_sk,
+round(ss_qty/(coalesce(ws_qty,0)+coalesce(cs_qty,0)),2) ratio,
+ss_qty store_qty, ss_wc store_wholesale_cost, ss_sp store_sales_price,
+coalesce(ws_qty,0)+coalesce(cs_qty,0) other_chan_qty,
+coalesce(ws_wc,0)+coalesce(cs_wc,0) other_chan_wholesale_cost,
+coalesce(ws_sp,0)+coalesce(cs_sp,0) other_chan_sales_price
+from ss
+left join ws on (ws_sold_year=ss_sold_year and ws_item_sk=ss_item_sk and ws_customer_sk=ss_customer_sk)
+left join cs on (cs_sold_year=ss_sold_year and cs_item_sk=ss_item_sk and cs_customer_sk=ss_customer_sk)
+where (coalesce(ws_qty,0)>0 or coalesce(cs_qty, 0)>0) and ss_sold_year=2000
+order by 
+  ss_sold_year, ss_item_sk, ss_customer_sk,
+  ss_qty desc, ss_wc desc, ss_sp desc,
+  other_chan_qty,
+  other_chan_wholesale_cost,
+  other_chan_sales_price,
+  ratio
+limit 100;
+
+-- end query 1 in stream 0 using template query78.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query79.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query79.sql
new file mode 100644
index 0000000..24cd606
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query79.sql
@@ -0,0 +1,23 @@
+-- start query 1 in stream 0 using template query79.tpl
+select 
+  c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit
+  from
+   (select ss_ticket_number
+          ,ss_customer_sk
+          ,store.s_city
+          ,sum(ss_coupon_amt) amt
+          ,sum(ss_net_profit) profit
+    from store_sales,date_dim,store,household_demographics
+    where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+    and store_sales.ss_store_sk = store.s_store_sk  
+    and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+    and (household_demographics.hd_dep_count = 8 or household_demographics.hd_vehicle_count > 0)
+    and date_dim.d_dow = 1
+    and date_dim.d_year in (1998,1998+1,1998+2) 
+    and store.s_number_employees between 200 and 295
+    group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer
+    where ss_customer_sk = c_customer_sk
+ order by c_last_name,c_first_name,substr(s_city,1,30), profit
+limit 100;
+
+-- end query 1 in stream 0 using template query79.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query81.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query81.sql
new file mode 100644
index 0000000..3726c42
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query81.sql
@@ -0,0 +1,31 @@
+-- start query 1 in stream 0 using template query81.tpl
+with customer_total_return as
+ (select cr_returning_customer_sk as ctr_customer_sk
+        ,ca_state as ctr_state, 
+ 	sum(cr_return_amt_inc_tax) as ctr_total_return
+ from catalog_returns
+     ,date_dim
+     ,customer_address
+ where cr_returned_date_sk = d_date_sk 
+   and d_year =1998
+   and cr_returning_addr_sk = ca_address_sk 
+ group by cr_returning_customer_sk
+         ,ca_state )
+  select  c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name
+                   ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset
+                  ,ca_location_type,ctr_total_return
+ from customer_total_return ctr1
+     ,customer_address
+     ,customer
+ where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+ 			  from customer_total_return ctr2 
+                  	  where ctr1.ctr_state = ctr2.ctr_state)
+       and ca_address_sk = c_current_addr_sk
+       and ca_state = 'IL'
+       and ctr1.ctr_customer_sk = c_customer_sk
+ order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name
+                   ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset
+                  ,ca_location_type,ctr_total_return
+ limit 100;
+
+-- end query 1 in stream 0 using template query81.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query82.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query82.sql
new file mode 100644
index 0000000..3acbdd9
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query82.sql
@@ -0,0 +1,17 @@
+-- start query 1 in stream 0 using template query82.tpl
+select  i_item_id
+       ,i_item_desc
+       ,i_current_price
+ from item, inventory, date_dim, store_sales
+ where i_current_price between 30 and 30+30
+ and inv_item_sk = i_item_sk
+ and d_date_sk=inv_date_sk
+ and cast(d_date as timestamp) between cast('2002-05-30' as timestamp) and (cast('2002-05-30' as timestamp) + interval 60 days)
+ and i_manufact_id in (437,129,727,663)
+ and inv_quantity_on_hand between 100 and 500
+ and ss_item_sk = i_item_sk
+ group by i_item_id,i_item_desc,i_current_price
+ order by i_item_id
+ limit 100;
+
+-- end query 1 in stream 0 using template query82.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query83.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query83.sql
new file mode 100644
index 0000000..440431f
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query83.sql
@@ -0,0 +1,67 @@
+-- start query 1 in stream 0 using template query83.tpl
+with sr_items as
+ (select i_item_id item_id,
+        sum(sr_return_quantity) sr_item_qty
+ from store_returns,
+      item,
+      date_dim
+ where sr_item_sk = i_item_sk
+ and   d_date    in 
+	(select d_date
+	from date_dim
+	where d_week_seq in 
+		(select d_week_seq
+		from date_dim
+	  where d_date in ('1998-01-02','1998-10-15','1998-11-10')))
+ and   sr_returned_date_sk   = d_date_sk
+ group by i_item_id),
+ cr_items as
+ (select i_item_id item_id,
+        sum(cr_return_quantity) cr_item_qty
+ from catalog_returns,
+      item,
+      date_dim
+ where cr_item_sk = i_item_sk
+ and   d_date    in 
+	(select d_date
+	from date_dim
+	where d_week_seq in 
+		(select d_week_seq
+		from date_dim
+	  where d_date in ('1998-01-02','1998-10-15','1998-11-10')))
+ and   cr_returned_date_sk   = d_date_sk
+ group by i_item_id),
+ wr_items as
+ (select i_item_id item_id,
+        sum(wr_return_quantity) wr_item_qty
+ from web_returns,
+      item,
+      date_dim
+ where wr_item_sk = i_item_sk
+ and   d_date    in 
+	(select d_date
+	from date_dim
+	where d_week_seq in 
+		(select d_week_seq
+		from date_dim
+		where d_date in ('1998-01-02','1998-10-15','1998-11-10')))
+ and   wr_returned_date_sk   = d_date_sk
+ group by i_item_id)
+  select  sr_items.item_id
+       ,sr_item_qty
+       ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev
+       ,cr_item_qty
+       ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev
+       ,wr_item_qty
+       ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev
+       ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average
+ from sr_items
+     ,cr_items
+     ,wr_items
+ where sr_items.item_id=cr_items.item_id
+   and sr_items.item_id=wr_items.item_id 
+ order by sr_items.item_id
+         ,sr_item_qty
+ limit 100;
+
+-- end query 1 in stream 0 using template query83.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query84.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query84.sql
new file mode 100644
index 0000000..2332a83
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query84.sql
@@ -0,0 +1,21 @@
+-- start query 1 in stream 0 using template query84.tpl
+select  c_customer_id as customer_id
+       , concat(coalesce(c_last_name,''), ', ', coalesce(c_first_name,'')) as customername
+ from customer
+     ,customer_address
+     ,customer_demographics
+     ,household_demographics
+     ,income_band
+     ,store_returns
+ where ca_city	        =  'Hopewell'
+   and c_current_addr_sk = ca_address_sk
+   and ib_lower_bound   >=  32287
+   and ib_upper_bound   <=  32287 + 50000
+   and ib_income_band_sk = hd_income_band_sk
+   and cd_demo_sk = c_current_cdemo_sk
+   and hd_demo_sk = c_current_hdemo_sk
+   and sr_cdemo_sk = cd_demo_sk
+ order by c_customer_id
+ limit 100;
+
+-- end query 1 in stream 0 using template query84.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query88.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query88.sql
new file mode 100644
index 0000000..8a9a277
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query88.sql
@@ -0,0 +1,94 @@
+-- start query 1 in stream 0 using template query88.tpl
+select  *
+from
+ (select count(*) h8_30_to_9
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk   
+     and ss_hdemo_sk = household_demographics.hd_demo_sk 
+     and ss_store_sk = s_store_sk
+     and time_dim.t_hour = 8
+     and time_dim.t_minute >= 30
+     and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+          (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+          (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) 
+     and store.s_store_name = 'ese') s1,
+ (select count(*) h9_to_9_30 
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+     and ss_hdemo_sk = household_demographics.hd_demo_sk
+     and ss_store_sk = s_store_sk 
+     and time_dim.t_hour = 9 
+     and time_dim.t_minute < 30
+     and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+          (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+          (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2))
+     and store.s_store_name = 'ese') s2,
+ (select count(*) h9_30_to_10 
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+     and ss_hdemo_sk = household_demographics.hd_demo_sk
+     and ss_store_sk = s_store_sk
+     and time_dim.t_hour = 9
+     and time_dim.t_minute >= 30
+     and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+          (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+          (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2))
+     and store.s_store_name = 'ese') s3,
+ (select count(*) h10_to_10_30
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+     and ss_hdemo_sk = household_demographics.hd_demo_sk
+     and ss_store_sk = s_store_sk
+     and time_dim.t_hour = 10 
+     and time_dim.t_minute < 30
+     and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+          (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+          (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2))
+     and store.s_store_name = 'ese') s4,
+ (select count(*) h10_30_to_11
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+     and ss_hdemo_sk = household_demographics.hd_demo_sk
+     and ss_store_sk = s_store_sk
+     and time_dim.t_hour = 10 
+     and time_dim.t_minute >= 30
+     and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+          (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+          (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2))
+     and store.s_store_name = 'ese') s5,
+ (select count(*) h11_to_11_30
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+     and ss_hdemo_sk = household_demographics.hd_demo_sk
+     and ss_store_sk = s_store_sk 
+     and time_dim.t_hour = 11
+     and time_dim.t_minute < 30
+     and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+          (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+          (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2))
+     and store.s_store_name = 'ese') s6,
+ (select count(*) h11_30_to_12
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+     and ss_hdemo_sk = household_demographics.hd_demo_sk
+     and ss_store_sk = s_store_sk
+     and time_dim.t_hour = 11
+     and time_dim.t_minute >= 30
+     and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+          (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+          (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2))
+     and store.s_store_name = 'ese') s7,
+ (select count(*) h12_to_12_30
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+     and ss_hdemo_sk = household_demographics.hd_demo_sk
+     and ss_store_sk = s_store_sk
+     and time_dim.t_hour = 12
+     and time_dim.t_minute < 30
+     and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
+          (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+          (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2))
+     and store.s_store_name = 'ese') s8
+;
+
+-- end query 1 in stream 0 using template query88.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query89.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query89.sql
new file mode 100644
index 0000000..6253c3f
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query89.sql
@@ -0,0 +1,28 @@
+-- start query 1 in stream 0 using template query89.tpl
+select  *
+from(
+select i_category, i_class, i_brand,
+       s_store_name, s_company_name,
+       d_moy,
+       sum(ss_sales_price) sum_sales,
+       avg(sum(ss_sales_price)) over
+         (partition by i_category, i_brand, s_store_name, s_company_name)
+         avg_monthly_sales
+from item, store_sales, date_dim, store
+where ss_item_sk = i_item_sk and
+      ss_sold_date_sk = d_date_sk and
+      ss_store_sk = s_store_sk and
+      d_year in (2000) and
+        ((i_category in ('Home','Books','Electronics') and
+          i_class in ('wallpaper','parenting','musical')
+         )
+      or (i_category in ('Shoes','Jewelry','Men') and
+          i_class in ('womens','birdal','pants') 
+        ))
+group by i_category, i_class, i_brand,
+         s_store_name, s_company_name, d_moy) tmp1
+where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1
+order by sum_sales - avg_monthly_sales, s_store_name
+limit 100;
+
+-- end query 1 in stream 0 using template query89.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query90.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query90.sql
new file mode 100644
index 0000000..369f9e0
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query90.sql
@@ -0,0 +1,22 @@
+-- start query 1 in stream 0 using template query90.tpl
+select  cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio
+ from ( select count(*) amc
+       from web_sales, household_demographics , time_dim, web_page
+       where ws_sold_time_sk = time_dim.t_time_sk
+         and ws_ship_hdemo_sk = household_demographics.hd_demo_sk
+         and ws_web_page_sk = web_page.wp_web_page_sk
+         and time_dim.t_hour between 6 and 6+1
+         and household_demographics.hd_dep_count = 8
+         and web_page.wp_char_count between 5000 and 5200) amt,
+      ( select count(*) pmc
+       from web_sales, household_demographics , time_dim, web_page
+       where ws_sold_time_sk = time_dim.t_time_sk
+         and ws_ship_hdemo_sk = household_demographics.hd_demo_sk
+         and ws_web_page_sk = web_page.wp_web_page_sk
+         and time_dim.t_hour between 14 and 14+1
+         and household_demographics.hd_dep_count = 8
+         and web_page.wp_char_count between 5000 and 5200) pmt
+ order by am_pm_ratio
+ limit 100;
+
+-- end query 1 in stream 0 using template query90.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query91.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query91.sql
new file mode 100644
index 0000000..2519b8f
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query91.sql
@@ -0,0 +1,31 @@
+-- start query 1 in stream 0 using template query91.tpl
+select  
+        cc_call_center_id Call_Center,
+        cc_name Call_Center_Name,
+        cc_manager Manager,
+        sum(cr_net_loss) Returns_Loss
+from
+        call_center,
+        catalog_returns,
+        date_dim,
+        customer,
+        customer_address,
+        customer_demographics,
+        household_demographics
+where
+        cr_call_center_sk       = cc_call_center_sk
+and     cr_returned_date_sk     = d_date_sk
+and     cr_returning_customer_sk= c_customer_sk
+and     cd_demo_sk              = c_current_cdemo_sk
+and     hd_demo_sk              = c_current_hdemo_sk
+and     ca_address_sk           = c_current_addr_sk
+and     d_year                  = 1999 
+and     d_moy                   = 11
+and     ( (cd_marital_status       = 'M' and cd_education_status     = 'Unknown')
+        or(cd_marital_status       = 'W' and cd_education_status     = 'Advanced Degree'))
+and     hd_buy_potential like '0-500%'
+and     ca_gmt_offset           = -7
+group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status
+order by sum(cr_net_loss) desc;
+
+-- end query 1 in stream 0 using template query91.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query92.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query92.sql
new file mode 100644
index 0000000..aff17da
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query92.sql
@@ -0,0 +1,30 @@
+-- start query 1 in stream 0 using template query92.tpl
+select  
+   sum(ws_ext_discount_amt)  as "Excess Discount Amount" 
+from 
+    web_sales 
+   ,item 
+   ,date_dim
+where
+i_manufact_id = 269
+and i_item_sk = ws_item_sk 
+and cast(d_date as timestamp) between cast('1998-03-18' as timestamp) and 
+        (cast('1998-03-18' as timestamp) + interval 90 days)
+and d_date_sk = ws_sold_date_sk 
+and ws_ext_discount_amt  
+     > ( 
+         SELECT 
+            1.3 * avg(ws_ext_discount_amt) 
+         FROM 
+            web_sales 
+           ,date_dim
+         WHERE 
+              ws_item_sk = i_item_sk 
+          and cast(d_date as timestamp) between cast('1998-03-18' as timestamp) and 
+                             (cast('1998-03-18' as timestamp) + interval 90 days)
+          and d_date_sk = ws_sold_date_sk 
+      ) 
+order by sum(ws_ext_discount_amt)
+limit 100;
+
+-- end query 1 in stream 0 using template query92.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query94.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query94.sql
new file mode 100644
index 0000000..a6ff277
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query94.sql
@@ -0,0 +1,29 @@
+-- start query 1 in stream 0 using template query94.tpl
+select  
+   count(distinct ws_order_number) as "order count"
+  ,sum(ws_ext_ship_cost) as "total shipping cost"
+  ,sum(ws_net_profit) as "total net profit"
+from
+   web_sales ws1
+  ,date_dim
+  ,customer_address
+  ,web_site
+where
+    cast(d_date as timestamp) between cast('1999-05-01' as timestamp) and 
+           (cast('1999-05-01' as timestamp) + interval 60 days)
+and ws1.ws_ship_date_sk = d_date_sk
+and ws1.ws_ship_addr_sk = ca_address_sk
+and ca_state = 'TX'
+and ws1.ws_web_site_sk = web_site_sk
+and web_company_name = 'pri'
+and exists (select *
+            from web_sales ws2
+            where ws1.ws_order_number = ws2.ws_order_number
+              and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk)
+and not exists(select *
+               from web_returns wr1
+               where ws1.ws_order_number = wr1.wr_order_number)
+order by count(distinct ws_order_number)
+limit 100;
+
+-- end query 1 in stream 0 using template query94.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query95.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query95.sql
new file mode 100644
index 0000000..bd2e7ae
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query95.sql
@@ -0,0 +1,32 @@
+-- start query 1 in stream 0 using template query95.tpl
+with ws_wh as
+(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2
+ from web_sales ws1,web_sales ws2
+ where ws1.ws_order_number = ws2.ws_order_number
+   and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk)
+ select  
+   count(distinct ws_order_number) as "order count"
+  ,sum(ws_ext_ship_cost) as "total shipping cost"
+  ,sum(ws_net_profit) as "total net profit"
+from
+   web_sales ws1
+  ,date_dim
+  ,customer_address
+  ,web_site
+where
+    cast(d_date as timestamp) between cast('1999-05-01' as timestamp) and 
+           (cast('1999-05-01' as timestamp) + interval 60 days)
+and ws1.ws_ship_date_sk = d_date_sk
+and ws1.ws_ship_addr_sk = ca_address_sk
+and ca_state = 'TX'
+and ws1.ws_web_site_sk = web_site_sk
+and web_company_name = 'pri'
+and ws1.ws_order_number in (select ws_order_number
+                            from ws_wh)
+and ws1.ws_order_number in (select wr_order_number
+                            from web_returns,ws_wh
+                            where wr_order_number = ws_wh.ws_order_number)
+order by count(distinct ws_order_number)
+limit 100;
+
+-- end query 1 in stream 0 using template query95.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query96.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query96.sql
new file mode 100644
index 0000000..390bc7d
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query96.sql
@@ -0,0 +1,16 @@
+-- start query 1 in stream 0 using template query96.tpl
+select  count(*) 
+from store_sales
+    ,household_demographics 
+    ,time_dim, store
+where ss_sold_time_sk = time_dim.t_time_sk   
+    and ss_hdemo_sk = household_demographics.hd_demo_sk 
+    and ss_store_sk = s_store_sk
+    and time_dim.t_hour = 8
+    and time_dim.t_minute >= 30
+    and household_demographics.hd_dep_count = 5
+    and store.s_store_name = 'ese'
+order by count(*)
+limit 100;
+
+-- end query 1 in stream 0 using template query96.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query97.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query97.sql
new file mode 100644
index 0000000..0db950c
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query97.sql
@@ -0,0 +1,25 @@
+-- start query 1 in stream 0 using template query97.tpl
+with ssci as (
+select ss_customer_sk customer_sk
+      ,ss_item_sk item_sk
+from store_sales,date_dim
+where ss_sold_date_sk = d_date_sk
+  and d_month_seq between 1212 and 1212 + 11
+group by ss_customer_sk
+        ,ss_item_sk),
+csci as(
+ select cs_bill_customer_sk customer_sk
+      ,cs_item_sk item_sk
+from catalog_sales,date_dim
+where cs_sold_date_sk = d_date_sk
+  and d_month_seq between 1212 and 1212 + 11
+group by cs_bill_customer_sk
+        ,cs_item_sk)
+ select  sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only
+      ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only
+      ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog
+from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk
+                               and ssci.item_sk = csci.item_sk)
+limit 100;
+
+-- end query 1 in stream 0 using template query97.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query98.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query98.sql
new file mode 100644
index 0000000..d0bd4cf
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query98.sql
@@ -0,0 +1,33 @@
+-- start query 1 in stream 0 using template query98.tpl
+select i_item_id
+      ,i_item_desc 
+      ,i_category 
+      ,i_class 
+      ,i_current_price
+      ,sum(ss_ext_sales_price) as itemrevenue 
+      ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over
+          (partition by i_class) as revenueratio
+from	
+	store_sales
+    	,item 
+    	,date_dim
+where 
+	ss_item_sk = i_item_sk 
+  	and i_category in ('Jewelry', 'Sports', 'Books')
+  	and ss_sold_date_sk = d_date_sk
+	and cast(d_date as timestamp) between cast('2001-01-12' as timestamp) 
+				and (cast('2001-01-12' as timestamp) + interval 30 days)
+group by 
+	i_item_id
+        ,i_item_desc 
+        ,i_category
+        ,i_class
+        ,i_current_price
+order by 
+	i_category
+        ,i_class
+        ,i_item_id
+        ,i_item_desc
+        ,revenueratio;
+
+-- end query 1 in stream 0 using template query98.tpl
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query99.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query99.sql
new file mode 100644
index 0000000..757cf27
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query99.sql
@@ -0,0 +1,35 @@
+-- start query 1 in stream 0 using template query99.tpl
+select  
+   substr(w_warehouse_name,1,20)
+  ,sm_type
+  ,cc_name
+  ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end)  as "30 days" 
+  ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and 
+                 (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end )  as "31-60 days" 
+  ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and 
+                 (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end)  as "61-90 days" 
+  ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and
+                 (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end)  as "91-120 days" 
+  ,sum(case when (cs_ship_date_sk - cs_sold_date_sk  > 120) then 1 else 0 end)  as ">120 days" 
+from
+   catalog_sales
+  ,warehouse
+  ,ship_mode
+  ,call_center
+  ,date_dim
+where
+    d_month_seq between 1212 and 1212 + 11
+and cs_ship_date_sk   = d_date_sk
+and cs_warehouse_sk   = w_warehouse_sk
+and cs_ship_mode_sk   = sm_ship_mode_sk
+and cs_call_center_sk = cc_call_center_sk
+group by
+   substr(w_warehouse_name,1,20)
+  ,sm_type
+  ,cc_name
+order by substr(w_warehouse_name,1,20)
+        ,sm_type
+        ,cc_name
+limit 100;
+
+-- end query 1 in stream 0 using template query99.tpl

[impala] 01/09: [DOCS] Format fixes in impala_shutdown.xml

Posted by ta...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 9d5e2a20ec4fd0e34fc930c072f71d6170c5c4de
Author: Alex Rodoni <ar...@cloudera.com>
AuthorDate: Fri Feb 8 17:43:18 2019 -0800

    [DOCS] Format fixes in impala_shutdown.xml
    
    Change-Id: I165d9c2ee9de3a5a6c23e0664a53705411e817f4
    Reviewed-on: http://gerrit.cloudera.org:8080/12424
    Reviewed-by: Alex Rodoni <ar...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 docs/topics/impala_shutdown.xml | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/docs/topics/impala_shutdown.xml b/docs/topics/impala_shutdown.xml
index cc636cd..642003c 100644
--- a/docs/topics/impala_shutdown.xml
+++ b/docs/topics/impala_shutdown.xml
@@ -139,23 +139,14 @@ under the License.
 
         </stentry>
 
-        <stentry><codeph>The value of the <codeph>--shutdown_deadline_s</codeph>
-            flag, which defaults to 1 hour.</codeph>
-
-        </stentry>
+        <stentry>The value of the <codeph>‑‑shutdown_deadline_s</codeph> flag,
+          which defaults to 1 hour. </stentry>
 
         <stentry>
-
-          <p>
-            <codeph><varname>deadline</varname></codeph> must be a non-negative number,
-            specified in seconds.
-          </p>
-
-          <p>
-            The value, 0, for <varname>deadline</varname> specifies an immediate shutdown.
-          </p>
-
-        </stentry>
+          <codeph><varname>deadline</varname></codeph> must be a non-negative
+          number, specified in seconds. <p> The value, 0, for
+              <varname>deadline</varname> specifies an immediate shutdown.
+          </p></stentry>
 
       </strow>

[impala] 04/09: IMPALA-8095: Detailed expression cardinality tests

Posted by ta...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 4ce689e58a000e78f641ecf544ca7f8c23345abb
Author: paul-rogers <pr...@cloudera.com>
AuthorDate: Sun Jan 20 11:48:30 2019 -0800

    IMPALA-8095: Detailed expression cardinality tests
    
    Cardinality is a critical input to the query planning process,
    especially join planning. Impala has many high-level end-to-end tests
    that implicitly test cardinality at the "wholesale" level: A test will
    produce a wrong result if the cardinality is badly wrong.
    
    This patch adds detailed unit tests for cardinality:
    
    * Table cardinality, NDV values and null count in metadata retrieved from
      HMS.
    * Table cardinality, NDV values and null counts in metadata presented to
      the query.
    * Expression NDV and selectivity values (which derive from table
      cardinality and column NDV.)
    
    The test illustrate a number of bugs. This patch simply identifies the
    bugs, comments out the tests that fail because of the bugs, and
    substitutes tests that pass with the current, incorrect, behavior.
    Future patches will fix the bugs. Reviewers can note the difference
    between the original, incorrect behavior shown here, and the revised
    behavior in those additional patches.
    
    Since none of the existing "functional" tables provide the level of
    detail needed for these tests, added a new test table specifically for
    this task.
    
    This set of tests was a good time to extend the test "fixture" framework
    created earlier. The FrontendTestBase class was refactored to use a new
    FrontendFixture which represents a (simulated) Impala and HMS cluster.
    The previous SessionFixture represents a single user session (with
    session options) and the QueryFixture represents a single query.
    
    As part of this refactoring, the fixture classes moved into "common"
    alongside FrontendTestBase.
    
    Testing: This patch includes only tests: no "production" code was
    changed.
    
    Change-Id: I3da58ee9b0beebeffb170b9430bd36d20dcd2401
    Reviewed-on: http://gerrit.cloudera.org:8080/12248
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../org/apache/impala/analysis/SelectListItem.java |   6 +-
 .../org/apache/impala/analysis/SlotDescriptor.java |   3 +
 .../impala/analysis/AnalyzeAuthStmtsTest.java      |  26 +-
 .../impala/analysis/ExprCardinalityTest.java       | 596 +++++++++++++++++++++
 .../org/apache/impala/analysis/ExprNdvTest.java    | 101 +++-
 .../impala/analysis/ExprRewriteRulesTest.java      |  24 +-
 .../apache/impala/common/AbstractFrontendTest.java |  58 ++
 .../impala/common/AnalysisSessionFixture.java      |  89 +++
 ...{FrontendTestBase.java => FrontendFixture.java} | 354 +++++-------
 .../org/apache/impala/common/FrontendTestBase.java | 207 +------
 .../QueryFixture.java}                             | 181 ++-----
 .../org/apache/impala/planner/CardinalityTest.java |  87 +++
 testdata/NullRows/data.csv                         |  26 +
 testdata/bin/compute-table-stats.sh                |   2 +-
 .../functional/functional_schema_template.sql      |  24 +
 .../datasets/functional/schema_constraints.csv     |   1 +
 16 files changed, 1250 insertions(+), 535 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectListItem.java b/fe/src/main/java/org/apache/impala/analysis/SelectListItem.java
index dd3e414..4849543 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SelectListItem.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SelectListItem.java
@@ -17,14 +17,14 @@
 
 package org.apache.impala.analysis;
 
+import static org.apache.impala.analysis.ToSqlOptions.DEFAULT;
+
 import java.util.List;
 
 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 
-import static org.apache.impala.analysis.ToSqlOptions.DEFAULT;
-
-class SelectListItem {
+public class SelectListItem {
   private Expr expr_;
   private String alias_;
 
diff --git a/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java b/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java
index f203d70..5be2303 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java
@@ -307,4 +307,7 @@ public class SlotDescriptor {
         .add("stats", stats_)
         .toString();
   }
+
+  @Override
+  public String toString() { return debugString(); }
 }
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeAuthStmtsTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeAuthStmtsTest.java
index 18c663d..be47a08 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeAuthStmtsTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeAuthStmtsTest.java
@@ -24,12 +24,17 @@ import org.apache.impala.catalog.Catalog;
 import org.apache.impala.catalog.Role;
 import org.apache.impala.catalog.User;
 import org.apache.impala.common.AnalysisException;
+import org.apache.impala.common.FrontendTestBase;
 import org.apache.impala.testutil.TestUtils;
 import org.apache.impala.thrift.TQueryCtx;
 import org.apache.impala.util.EventSequence;
 import org.junit.Test;
 
-public class AnalyzeAuthStmtsTest extends AnalyzerTest {
+public class AnalyzeAuthStmtsTest extends FrontendTestBase {
+
+  // TODO: Change this to a @BeforeClass method. Then, clean up these
+  // items in @AfterClass, else we've made a global change that may affect
+  // other tests in random ways.
   public AnalyzeAuthStmtsTest() {
     catalog_.getAuthPolicy().addPrincipal(
         new Role("myRole", new HashSet<>()));
@@ -37,6 +42,25 @@ public class AnalyzeAuthStmtsTest extends AnalyzerTest {
         new User("myUser", new HashSet<>()));
   }
 
+  // TODO: Switch to use a fixture with custom settings rather than the
+  // current patchwork of base and derived class methods.
+  /**
+   * Analyze 'stmt', expecting it to pass. Asserts in case of analysis error.
+   */
+  @Override
+  public ParseNode AnalyzesOk(String stmt) {
+    return AnalyzesOk(stmt, createAnalysisCtx(Catalog.DEFAULT_DB), null);
+  }
+
+  /**
+   * Asserts if stmt passes analysis or the error string doesn't match and it
+   * is non-null.
+   */
+  @Override
+  public void AnalysisError(String stmt, String expectedErrorString) {
+    AnalysisError(stmt, createAnalysisCtx(Catalog.DEFAULT_DB), expectedErrorString);
+  }
+
   @Override
   protected AnalysisContext createAnalysisCtx(String defaultDb) {
     TQueryCtx queryCtx = TestUtils.createQueryContext(
diff --git a/fe/src/test/java/org/apache/impala/analysis/ExprCardinalityTest.java b/fe/src/test/java/org/apache/impala/analysis/ExprCardinalityTest.java
new file mode 100644
index 0000000..b30db8f
--- /dev/null
+++ b/fe/src/test/java/org/apache/impala/analysis/ExprCardinalityTest.java
@@ -0,0 +1,596 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.analysis;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import java.util.Set;
+
+import org.apache.curator.shaded.com.google.common.collect.Sets;
+import org.apache.impala.catalog.Catalog;
+import org.apache.impala.catalog.Column;
+import org.apache.impala.catalog.ColumnStats;
+import org.apache.impala.catalog.DatabaseNotFoundException;
+import org.apache.impala.catalog.Db;
+import org.apache.impala.catalog.Table;
+import org.apache.impala.common.AnalysisSessionFixture;
+import org.apache.impala.common.ImpalaException;
+import org.apache.impala.common.InternalException;
+import org.apache.impala.common.QueryFixture.SelectFixture;
+import org.apache.impala.planner.CardinalityTest;
+import org.junit.Test;
+
+/**
+ * Tests expression cardinality and selectivity, both of which are
+ * important inputs to scan and join cardinality estimates.
+ *
+ * In the comments below, the notation |x| means the cardinality of
+ * x. If x is a table, then it is the row count of x. If x is a column,
+ * then it is the number of distinct values (the cardinality of the
+ * domain of the column), also known as NDV.
+ *
+ * This test focuses on cardinality and the selectivity that determines
+ * derived cardinality. If |T| is the cardinality of table T, then
+ * |T'| is defined as the cardinality of table T after applying a selection s.
+ * Selectivity is defined as:
+ *
+ * sel(s) = |T'|/|T|
+ *
+ * Or
+ *
+ * |T'| = |T| * sel(s)
+ *
+ * Though not used here, it can be helpful to think of the selectivity as
+ * the probability p that some row r appears in the output after selection:
+ *
+ * sel(s) = p(r in |T'|)
+ *
+ * Tests here focus on the entire cardinality and NDV lifecycle up to an
+ * expression, ensuring that we produce proper overall estimates. See also:
+ *
+ * * {@link ExprNdvTest} which focuses on the actual NDV calculation
+ *   method,
+ * * {@link CardinalityTest} which examines cardinality output from the
+ *   planner.
+ *
+ * The tests here illustrate a number of known bugs, typically marked by
+ * their ticket number. IMPALA-7601 is a roll-up for the general case that
+ * Impala does not estimate selectivity except in the narrow (col = const)
+ * case.
+ */
+public class ExprCardinalityTest {
+  private static AnalysisSessionFixture session_ = new AnalysisSessionFixture();
+
+  private void verifyTableCol(Table table, String colName,
+      long expectedNdv, long expectedNullCount) {
+    Column col = table.getColumn(colName);
+    assertNotNull(col);
+    ColumnStats stats = col.getStats();
+    assertNotNull(stats);
+    assertEquals(expectedNdv, stats.getNumDistinctValues());
+    assertEquals(expectedNullCount, stats.getNumNulls());
+  }
+
+  /**
+   * Baseline test of metadata cardinality, NDVs and null count.
+   * Locks down the values used in later tests to catch external changes
+   * easily.
+   *
+   * Cases:
+   * - With stats
+   *   - Columns without nulls
+   *   - Columns with nulls
+   * - Without stats, estimated from file size and schema
+   *
+   * (The last bit is not yet available.)
+   */
+
+  @Test
+  public void testMetadata() throws DatabaseNotFoundException, InternalException {
+    Catalog catalog = session_.catalog();
+    Db db = catalog.getDb("functional");
+    StmtMetadataLoader mdLoader =
+        new StmtMetadataLoader(session_.frontend(), "functional", null);
+    Set<TableName> tables = Sets.newHashSet(
+        new TableName("functional", "alltypes"),
+        new TableName("functional", "nullrows"),
+        new TableName("functional", "manynulls"));
+    mdLoader.loadTables(tables);
+
+    // Table with stats, no nulls
+    Table allTypes = db.getTable("alltypes");
+    assertEquals(7300, allTypes.getTTableStats().getNum_rows());
+    verifyTableCol(allTypes, "id", 7300, 0);
+    verifyTableCol(allTypes, "bool_col", 2, 0);
+    verifyTableCol(allTypes, "int_col", 10, 0);
+    // Bug: NDV of partition columns is -1 though it is listed as
+    // 2 in the shell with: SHOW COLUMN STATS alltypes
+    //verifyTableCol(allTypes, "year", 2, 0);
+    // Bug: When tests are run in Eclipse we get the result above.
+    // But, when the same test is run using maven from the command line,
+    // we get the result shown below.
+    // Unit test in Eclipse see the above, unit tests run from the
+    // Disabling both to avoid a flaky test,
+    // Same issue for the next three tests.
+    //verifyTableCol(allTypes, "year", -1, -1);
+    //verifyTableCol(allTypes, "month", 12, 0);
+    //verifyTableCol(allTypes, "month", -1, -1);
+
+    // Table with stats and nulls
+    Table nullrows = db.getTable("nullrows");
+    assertEquals(26, nullrows.getTTableStats().getNum_rows());
+    verifyTableCol(nullrows, "id", 26, 0);
+    // Bug: NDV should be 1 to include nulls
+    verifyTableCol(nullrows, "null_str", 0, 26);
+    verifyTableCol(nullrows, "group_str", 6, 0);
+    verifyTableCol(nullrows, "some_nulls", 6, 20);
+    // Oddly, boolean columns DO include nulls in NDV.
+    verifyTableCol(nullrows, "bool_nulls", 3, 15);
+
+    // Table without stats
+    Table manynulls = db.getTable("manynulls");
+    // Bug: Table cardinality should be guessed from schema & file size.
+    assertEquals(-1, manynulls.getTTableStats().getNum_rows());
+    verifyTableCol(manynulls, "id", -1, -1);
+  }
+
+  public void verifySelectCol(String table, String col,
+      long expectedNdv, long expectedNullCount) throws ImpalaException {
+    SelectFixture select = new SelectFixture(session_)
+        .table("functional." + table)
+        .exprSql(col);
+    Expr expr = select.analyzeExpr();
+    SlotRef colRef = (SlotRef) expr;
+    assertEquals(expectedNdv, expr.getNumDistinctValues());
+    assertEquals(expectedNullCount, colRef.getDesc().getStats().getNumNulls());
+    // Columns don't have selectivity, only expressions on columns
+    assertEquals(-1, expr.getSelectivity(), 0.001);
+  }
+
+  /**
+   * Test cardinality of the column references within an AST.
+   * Ensures that the metadata cardinality was propagated into the
+   * AST, along with possible adjustments.
+   *
+   * Cases:
+   * - With stats
+   *   - Normal NDV
+   *   - Small NDV
+   *   - Small NDV with nulls
+   *   - NDV with all nulls
+   *   - Constants
+   * - Without stats
+   * @throws ImpalaException
+   */
+  @Test
+  public void testColumnCardinality() throws ImpalaException {
+    // Stats, no null values
+    verifySelectCol("alltypes", "id", 7300, 0);
+    verifySelectCol("alltypes", "bool_col", 2, 0);
+    verifySelectCol("alltypes", "int_col", 10, 0);
+    // Bug: Stats not available for partition columns
+    //verifySelectExpr("alltypes", "year", 2, 0);
+    // Bug: Unit test in Eclipse see the above, unit tests run from the
+    // command line see the below. Disabling to avoid a flaky test,
+    // here and below.
+    //verifySelectExpr("alltypes", "year", -1, -1);
+    //verifySelectExpr("alltypes", "month", 12, 0);
+    //verifySelectExpr("alltypes", "month", -1, -1);
+
+    // Stats, with null values
+    verifySelectCol("nullrows", "id", 26, 0);
+    // Bug: NDV should be 1 to include nulls
+    verifySelectCol("nullrows", "null_str", 0, 26);
+    verifySelectCol("nullrows", "group_str", 6, 0);
+    verifySelectCol("nullrows", "some_nulls", 6, 20);
+    // Oddly, boolean columns DO include nulls in NDV.
+    verifySelectCol("nullrows", "bool_nulls", 3, 15);
+
+    // No stats
+    verifySelectCol("manynulls", "id", -1, -1);
+  }
+
+  public void verifySelectExpr(String table, String exprSql,
+      long expectedNdv, double expectedSel) throws ImpalaException {
+    SelectFixture select = new SelectFixture(session_)
+        .table("functional." + table)
+        .exprSql(exprSql);
+    Expr expr = select.analyzeExpr();
+    assertEquals(expectedNdv, expr.getNumDistinctValues());
+    assertEquals(expectedSel, expr.getSelectivity(), 0.00001);
+  }
+
+  /**
+   * Constants have an NDV of 1, selectivity of -1.
+   */
+  @Test
+  public void testConstants() throws ImpalaException {
+    verifySelectExpr("alltypes", "10", 1, -1);
+    verifySelectExpr("allTypes", "'foo'", 1, -1);
+    // Note that the constant NULL has an NDV = 1, but
+    // Null-only columns have an NDV=0...
+    // See IMPALA-8058
+    verifySelectExpr("alltypes", "NULL", 1, -1);
+    verifySelectExpr("alltypes", "true", 1, -1);
+  }
+
+  // Expression selectivity
+  // - Test for each expression type
+  // - Test for variety of situations
+  //   - Valid/invalid table cardinality
+  //   - Valid/invalid NDV
+  //   - Valid/invalid null count
+
+  /**
+   * Test col = const
+   *
+   * selectivity = 1 / |col|
+   */
+  @Test
+  public void testEqSelectivity() throws ImpalaException {
+    verifySelectExpr("alltypes", "id = 10", 3, 1.0/7300);
+    verifySelectExpr("alltypes", "bool_col = true", 3, 1.0/2);
+    verifySelectExpr("alltypes", "int_col = 10", 3, 1.0/10);
+
+    verifySelectExpr("nullrows", "id = 'foo'", 3, 1.0/26);
+    // Bug: All nulls, so NDV should = 1, so Sel should be 1.0/1
+    //verifySelectExpr("nullrows", "c = 'foo'", 3, 1.0/1);
+    verifySelectExpr("nullrows", "null_str = 'foo'", 3, -1);
+    verifySelectExpr("nullrows", "group_str = 'foo'", 3, 1.0/6);
+    //verifySelectExpr("nullrows", "some_nulls = 'foo'", 3, 1.0/7);
+    verifySelectExpr("nullrows", "some_nulls = 'foo'", 3, 1.0/6);
+
+    // Bug: Sel should default to good old 0.1
+    verifySelectExpr("manynulls", "id = 10", 3, -1);
+  }
+
+  /**
+   * Test col IS NOT DISTINCT FROM x
+   *
+   * Sel should be same as = if x is non-null, otherwise
+   * same as IS NULL
+   */
+  @Test
+  public void testNotDistinctSelectivity() throws ImpalaException {
+    verifySelectExpr("alltypes", "id is not distinct from 10", 3, 1.0/7300);
+    // Bug: does not treat NULL specially
+    // Bug: NDV sould be 2 since IS NOT DISTINCT won't return NULL
+    //verifySelectExpr("alltypes", "id is not distinct from null", 2, 0);
+    verifySelectExpr("alltypes", "id is not distinct from null", 3, 1.0/7300);
+    verifySelectExpr("alltypes", "bool_col is not distinct from true", 3, 1.0/2);
+    //verifySelectExpr("alltypes", "bool_col is not distinct from null", 2, 0);
+    verifySelectExpr("alltypes", "bool_col is not distinct from null", 3, 1.0/2);
+    verifySelectExpr("alltypes", "int_col is not distinct from 10", 3, 1.0/10);
+    //verifySelectExpr("alltypes", "int_col is not distinct from null", 2, 0);
+    verifySelectExpr("alltypes", "int_col is not distinct from null", 3, 1.0/10);
+
+    verifySelectExpr("nullrows", "id is not distinct from 'foo'", 3, 1.0/26);
+    //verifySelectExpr("nullrows", "id is not distinct from null", 2, 0);
+    verifySelectExpr("nullrows", "id is not distinct from null", 3, 1.0/26);
+    // Bug: All nulls, so NDV should = 1, so Sel should be 1.0/1
+    //verifySelectExpr("nullrows", "null_str is not distinct from 'foo'", 2, 1.0/1);
+    verifySelectExpr("nullrows", "null_str is not distinct from 'foo'", 3, -1);
+    verifySelectExpr("nullrows", "null_str is not distinct from null", 3, -1);
+    verifySelectExpr("nullrows", "group_str is not distinct from 'foo'", 3, 1.0/6);
+    //verifySelectExpr("nullrows", "group_str is not distinct from null", 2, 1);
+    verifySelectExpr("nullrows", "group_str is not distinct from null", 3, 1.0/6);
+    //verifySelectExpr("nullrows", "some_nulls is not distinct from 'foo'", 3, 1.0/7);
+    verifySelectExpr("nullrows", "some_nulls is not distinct from 'foo'", 3, 1.0/6);
+
+    // Bug: Sel should default to good old 0.1
+    verifySelectExpr("manynulls", "id is not distinct from 10", 3, -1);
+  }
+
+  /**
+   * Test col != const
+   */
+  @Test
+  public void testNeSelectivity() throws ImpalaException {
+    // Bug: No estimated selectivity for != (IMPALA-8039)
+    //verifySelectExpr("alltypes", "id != 10", 3, 1 - 1.0/7300);
+    verifySelectExpr("alltypes", "id != 10", 3, -1);
+    //verifySelectExpr("alltypes", "bool_col != true", 3, 1 - 1.0/2);
+    verifySelectExpr("alltypes", "bool_col != true", 3, -1);
+    //verifySelectExpr("alltypes", "int_col != 10", 3, 1 - 1.0/10);
+    verifySelectExpr("alltypes", "int_col != 10", 3, -1);
+
+    //verifySelectExpr("nullrows", "id != 'foo'", 3, 1 - 1.0/26);
+    verifySelectExpr("nullrows", "id != 'foo'", 3, -1);
+    // Bug: All nulls, so NDV should = 1, so Sel should be 1 - 1.0/1
+    //verifySelectExpr("nullrows", "null_str != 'foo'", 3, 1 - 1.0/1);
+    verifySelectExpr("nullrows", "null_str != 'foo'", 3, -1);
+    //verifySelectExpr("nullrows", "group_str != 'foo'", 3, 1 - 1.0/6);
+    verifySelectExpr("nullrows", "group_str != 'foo'", 3, -1);
+    //verifySelectExpr("nullrows", "some_nulls != 'foo'", 3, 1 - 1.0/7);
+    verifySelectExpr("nullrows", "some_nulls != 'foo'", 3, -1);
+
+    // Bug: Sel should default to 1 - good old 0.1
+    verifySelectExpr("manynulls", "id != 10", 3, -1);
+  }
+
+  /**
+   * Test col IS DISTINCT FROM x
+   *
+   * Sel should be 1 - Sel(col IS NOT DISTINCT FROM x)
+   */
+  @Test
+  public void testDistinctSelectivity() throws ImpalaException {
+    // BUG: IS DISTINCT has no selectivity
+    //verifySelectExpr("alltypes", "id is distinct from 10", 3, 1 - 1.0/7300);
+    verifySelectExpr("alltypes", "id is distinct from 10", 3, -1);
+    // Bug: does not treat NULL specially
+    // Bug: NDV sould be 2 since IS DISTINCT won't return NULL
+    //verifySelectExpr("alltypes", "id is distinct from null", 2, 1);
+    verifySelectExpr("alltypes", "id is distinct from null", 3, -1);
+    //verifySelectExpr("alltypes", "bool_col is distinct from true", 3, 1 - 1.0/2);
+    verifySelectExpr("alltypes", "bool_col is distinct from true", 3, -1);
+    //verifySelectExpr("alltypes", "bool_col is distinct from null", 2, 1);
+    verifySelectExpr("alltypes", "bool_col is distinct from null", 3, -1);
+    //verifySelectExpr("alltypes", "int_col is distinct from 10", 3, 1 - 1.0/10);
+    verifySelectExpr("alltypes", "int_col is distinct from 10", 3, -1);
+    //verifySelectExpr("alltypes", "int_col is distinct from null", 2, 1);
+    verifySelectExpr("alltypes", "int_col is distinct from null", 3, -1);
+
+    //verifySelectExpr("nullrows", "id is distinct from 'foo'", 3, 1 - 1.0/26);
+    verifySelectExpr("nullrows", "id is distinct from 'foo'", 3, -1);
+    //verifySelectExpr("nullrows", "id is distinct from null", 2, 1);
+    verifySelectExpr("nullrows", "id is distinct from null", 3, -1);
+    // Bug: All nulls, so NDV should = 1, so Sel should be 1.0/1
+    //verifySelectExpr("nullrows", "null_str is distinct from 'foo'", 2, 1 - 1.0/1);
+    verifySelectExpr("nullrows", "null_str is distinct from 'foo'", 3, -1);
+    verifySelectExpr("nullrows", "null_str is distinct from null", 3, -1);
+    //verifySelectExpr("nullrows", "group_str is distinct from 'foo'", 3, 1 - 1.0/6);
+    verifySelectExpr("nullrows", "group_str is distinct from 'foo'", 3, -1);
+    //verifySelectExpr("nullrows", "group_str is distinct from null", 2, 0);
+    verifySelectExpr("nullrows", "group_str is distinct from null", 3, -1);
+    //verifySelectExpr("nullrows", "some_nulls is not distinct from 'foo'", 3, 1 - 1.0/7);
+    verifySelectExpr("nullrows", "some_nulls is not distinct from 'foo'", 3, 1.0/6);
+
+    // Bug: Sel should default to 1 - good old 0.1
+    verifySelectExpr("manynulls", "id is distinct from 10", 3, -1);
+  }
+
+  public static final double INEQUALITY_SEL = 0.33;
+
+  private void verifyInequalitySel(String table, String col, String value)
+      throws ImpalaException {
+    for (String op : new String[] { "<", "<=", ">", ">="}) {
+      // Bug: No estimated selectivity for >, >=, <, <= (IMPALA-7603)
+      //verifySelectExpr(table, col + " " + op + " " + value, 3, INEQUALITY_SEL);
+      verifySelectExpr(table, col + " " + op + " " + value, 3, -1);
+    }
+  }
+
+  @Test
+  public void testInequalitySelectivity() throws ImpalaException {
+    verifyInequalitySel("alltypes", "id", "10");
+    verifyInequalitySel("alltypes", "int_col", "10");
+
+    verifyInequalitySel("nullrows", "id", "'foo'");
+    verifyInequalitySel("nullrows", "null_str", "'foo'");
+    verifyInequalitySel("nullrows", "group_str", "'foo'");
+    verifyInequalitySel("nullrows", "some_nulls", "'foo'");
+
+    // Bug: Sel should default to 1 - good old 0.1
+    verifyInequalitySel("manynulls", "id", "10");
+  }
+
+  /**
+   * Test col IS NULL
+   * Selectivity should be null_count / |table|
+   */
+  @Test
+  public void testIsNullSelectivity() throws ImpalaException {
+    // Bug: No estimated selectivity for IS NULL (IMPALA-8050)
+    // Should be null count / |table|
+    // Bug: NDV of IS NULL is 3, should be 2 since IS NULL will
+    // never itself return NULL
+    //verifySelectExpr("alltypes", "id is null", 2, 0);
+    verifySelectExpr("alltypes", "id is null", 3, -1);
+    //verifySelectExpr("alltypes", "bool_col is null", 2, 0);
+    verifySelectExpr("alltypes", "bool_col is null", 3, -1);
+    //verifySelectExpr("alltypes", "int_col is null", 2, 0);
+    verifySelectExpr("alltypes", "int_col is null", 3, -1);
+
+    //verifySelectExpr("nullrows", "id is null", 2, 0);
+    verifySelectExpr("nullrows", "id is null", 3, -1);
+     //verifySelectExpr("nullrows", "null_str is null", 2, 1);
+    verifySelectExpr("nullrows", "null_str is null", 3, 1);
+    //verifySelectExpr("nullrows", "group_str is null", 2, 0);
+    verifySelectExpr("nullrows", "group_str is null", 3, -1);
+    //verifySelectExpr("nullrows", "some_nulls is null", 2, 20.0/26);
+    verifySelectExpr("nullrows", "some_nulls is null", 3, 20.0/26);
+    verifySelectExpr("nullrows", "bool_nulls is not null", 3, 1 - 15.0/26);
+
+    // Bug: Sel should default to good old 0.1
+    verifySelectExpr("manynulls", "id is null", 3, -1);
+  }
+
+  /**
+   * Test col IS NOT NULL
+   * Selectivity should be 1 - null_count / |table|
+   */
+  @Test
+  public void testNotNullSelectivity() throws ImpalaException {
+    // Bug: No estimated selectivity for IS NOT NULL (IMPALA-8050)
+    // Should be 1 - null count / |table|
+    // Bug: NDV of IS NULL is 3, should be 2 since IS NOT NULL will
+    // never itself return NULL
+    //verifySelectExpr("alltypes", "id is not null", 2, 1);
+    verifySelectExpr("alltypes", "id is null", 3, -1);
+    //verifySelectExpr("alltypes", "bool_col is not null", 2, 1);
+    verifySelectExpr("alltypes", "bool_col is null", 3, -1);
+    //verifySelectExpr("alltypes", "int_col is not null", 2, 1);
+    verifySelectExpr("alltypes", "int_col is not null", 3, -1);
+
+    //verifySelectExpr("nullrows", "id is not null", 2, 1);
+    verifySelectExpr("nullrows", "id is not null", 3, -1);
+     //verifySelectExpr("nullrows", "null_str is not null", 2, 0);
+    verifySelectExpr("nullrows", "null_str is not null", 3, 0);
+    //verifySelectExpr("nullrows", "group_str is not null", 2, 1);
+    verifySelectExpr("nullrows", "group_str is not null", 3, -1);
+    //verifySelectExpr("nullrows", "some_nulls is not null", 2, 1 - 20.0/26);
+    verifySelectExpr("nullrows", "some_nulls is not null", 3, 1 - 20.0/26);
+    verifySelectExpr("nullrows", "bool_nulls is not null", 3, 1 - 15.0/26);
+
+    // Bug: Sel should default to good old 0.1
+    verifySelectExpr("manynulls", "id is not null", 3, -1);
+  }
+
+  /**
+   * Test col IN (a, b, c)
+   *
+   * The code should check only distinct values, so that
+   * |in| = NDV(in clause)
+   *
+   * Expected selectivity is |in| / |col|
+   *
+   * Where |col| = ndv(col)
+   *
+   * Estimate should be based on the "Containment" assumption: that the
+   * in-clause values are contained in the set of column values
+   */
+  @Test
+  public void testInSelectivity() throws ImpalaException {
+    verifySelectExpr("alltypes", "id in (1, 2, 3)", 3, 3.0/7300);
+    // Bug: Does not use NDV, just simple value count
+    //verifySelectExpr("alltypes", "id in (1, 2, 3, 2, 3, 1)", 3, 3.0/7300);
+    verifySelectExpr("alltypes", "id in (1, 2, 3, 2, 3, 1)", 3, 6.0/7300);
+    verifySelectExpr("alltypes", "bool_col in (true)", 3, 1.0/2);
+    verifySelectExpr("alltypes", "bool_col in (true, false)", 3, 2.0/2);
+    verifySelectExpr("alltypes", "int_col in (1, 2, 3)", 3, 3.0/10);
+    verifySelectExpr("alltypes",
+        "int_col in (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)", 3, 1);
+
+    verifySelectExpr("nullrows", "id in ('a', 'b', 'c')", 3, 3.0/26);
+    // Bug: Why -1?
+    //verifySelectExpr("nullrows", "null_str in ('a', 'b', 'c')", 3, 1);
+    verifySelectExpr("nullrows", "null_str in ('a', 'b', 'c')", 3, -1);
+    verifySelectExpr("nullrows", "group_str in ('a', 'b', 'c')", 3, 3.0/6);
+    //verifySelectExpr("nullrows", "some_nulls in ('a', 'b', 'c')", 3, 3.0/7);
+    verifySelectExpr("nullrows", "some_nulls in ('a', 'b', 'c')", 3, 3.0/6);
+
+    // Bug: Sel should default to good old 0.1
+    verifySelectExpr("manynulls", "id in (1, 3, 3)", 3, -1);
+  }
+
+  /**
+   * Test col NOT IN (a, b, c)
+   *
+   * Should be 1 = sel(col IN (a, b, c))
+   */
+  @Test
+  public void testNotInSelectivity() throws ImpalaException {
+    verifySelectExpr("alltypes", "id not in (1, 2, 3)", 3, 1 - 3.0/7300);
+    // Bug: Does not use NDV, just simple value count
+    //verifySelectExpr("alltypes", "id not in (1, 2, 3, 2, 3, 1)", 3, 1 - 3.0/7300);
+    verifySelectExpr("alltypes", "id not in (1, 2, 3, 2, 3, 1)", 3, 1 - 6.0/7300);
+    verifySelectExpr("alltypes", "bool_col not in (true)", 3, 1 - 1.0/2);
+    verifySelectExpr("alltypes", "bool_col not in (true, false)", 3, 1 - 2.0/2);
+    verifySelectExpr("alltypes", "int_col not in (1, 2, 3)", 3, 1 - 3.0/10);
+    verifySelectExpr("alltypes",
+        "int_col not in (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)", 3, 0);
+
+    verifySelectExpr("nullrows", "id not in ('a', 'b', 'c')", 3, 1 - 3.0/26);
+    // Bug: Why -1?
+    //verifySelectExpr("nullrows", "null_str not in ('a', 'b', 'c')", 3, 1);
+    verifySelectExpr("nullrows", "null_str not in ('a', 'b', 'c')", 3, -1);
+    verifySelectExpr("nullrows", "group_str not in ('a', 'b', 'c')", 3, 1 - 3.0/6);
+    //verifySelectExpr("nullrows", "some_nulls not in ('a', 'b', 'c')", 3, 1 - 3.0/7);
+    verifySelectExpr("nullrows", "some_nulls not in ('a', 'b', 'c')", 3, 1 - 3.0/6);
+
+    // Bug: Sel should default to 1 - good old 0.1
+    verifySelectExpr("manynulls", "id not in (1, 3, 3)", 3, -1);
+  }
+
+  @Test
+  public void testNotSelectivity() throws ImpalaException {
+    verifySelectExpr("alltypes", "not id in (1, 2, 3)", 3, 1 - 3.0/7300);
+    verifySelectExpr("alltypes", "not int_col in (1, 2)", 3, 1 - 2.0/10);
+    verifySelectExpr("alltypes", "not int_col = 10", 3, 1 - 1.0/10);
+
+    // Bug: Sel should default to 1 - good old 0.1
+    //verifySelectExpr("manynulls", "not id = 10", 3, 0.9);
+    verifySelectExpr("manynulls", "not id = 10", 3, -1);
+  }
+
+  @Test
+  public void testAndSelectivity() throws ImpalaException {
+    verifySelectExpr("alltypes", "bool_col = true", 3, 1.0/2);
+    verifySelectExpr("alltypes", "int_col = 10", 3, 1.0/10);
+    // Note: This is NOT the logic used in plan nodes!
+    verifySelectExpr("alltypes", "bool_col = true and int_col = 10", 3, 1.0/2 * 1.0/10);
+    // Bug: should be something like (1/3)^2
+    //verifySelectExpr("alltypes", "int_col >= 10 and int_col <= 20", 3, 0.11);
+    verifySelectExpr("alltypes", "int_col >= 10 and int_col <= 20", 3, -1);
+
+    // Bug: Should be a product of two estimates.
+    // But, the -1 from the inequality poisons the whole expression
+    //verifySelectExpr("alltypes", "int_col = 10 AND smallint_col > 20",
+    //      3, 1.0/10 * 0.33);
+    verifySelectExpr("alltypes", "int_col = 10 AND smallint_col > 20", 3, -1);
+  }
+
+  @Test
+  public void testOrSelectivity() throws ImpalaException {
+    verifySelectExpr("alltypes", "bool_col = true or int_col = 10",
+        3, 1.0/2 + 1.0/10 - 1.0/2 * 1.0/10);
+    // Chain of OR rewritten to IN
+    verifySelectExpr("alltypes", "int_col = 10 or int_col = 20", 3, 2.0/10);
+  }
+
+  /**
+   * Test col BETWEEN x and y. Rewritten to
+   * col >= x AND col <= y. Inequality should have an estimate. Since
+   * the expression is an AND, we multipley the two estimates.
+   * So, regardless of NDV and null count, selectivity should be
+   * something like 0.33^2.
+   */
+  @Test
+  public void testBetweenSelectivity() throws ImpalaException {
+    // Bug: NO selectivity for Between because it is rewritten to
+    // use inequalities, and there no selectivities for those
+    // See IMPALA-8042
+    //verifySelectExpr("alltypes", "id between 30 and 60", 3, 0.33 * 0.33);
+    verifySelectExpr("alltypes", "id between 30 and 60", 3, -1);
+    //verifySelectExpr("alltypes", "int_col between 30 and 60", 3, 0.33 * 0.33);
+    verifySelectExpr("alltypes", "int_col between 30 and 60", 3, -1);
+
+    // Should not matter that there are no stats
+    //verifySelectExpr("manynulls", "id between 30 and 60", 3, 0.33 * 0.33);
+    verifySelectExpr("manynulls", "id between 30 and 60", 3, -1);
+  }
+
+  /**
+   * Test col NOT BETWEEN x and y. Should be 1 - sel(col BETWEEN x and y).
+   */
+  @Test
+  public void testNotBetweenSelectivity() throws ImpalaException {
+    // Bug: NO selectivity for Not Between because it is rewritten to
+    // use inequalities, and there no selectivities for those
+    //verifySelectExpr("alltypes", "id not between 30 and 60", 3, 1 - 0.33 * 0.33);
+    verifySelectExpr("alltypes", "id not between 30 and 60", 3, -1);
+    //verifySelectExpr("alltypes", "int_col not between 30 and 60", 3, 1 - 0.33 * 0.33);
+    verifySelectExpr("alltypes", "int_col not between 30 and 60", 3, -1);
+
+    // Should not matter that there are no stats
+    //verifySelectExpr("manynulls", "id not between 30 and 60", 3, 1 - 0.33 * 0.33);
+    verifySelectExpr("manynulls", "id not between 30 and 60", 3, -1);
+  }
+}
diff --git a/fe/src/test/java/org/apache/impala/analysis/ExprNdvTest.java b/fe/src/test/java/org/apache/impala/analysis/ExprNdvTest.java
index 88e1160..8a573a6 100644
--- a/fe/src/test/java/org/apache/impala/analysis/ExprNdvTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/ExprNdvTest.java
@@ -64,6 +64,29 @@ public class ExprNdvTest extends FrontendTestBase {
     }
   }
 
+  /**
+   * Constants have an NDV.
+   */
+  @Test
+  public void testConsts() throws ImpalaException {
+    // Would expect 1, but is 2.
+    verifyNdv("case when 0 = 1 then 'yes' else 'no' end", 2);
+
+    // Constants have NDV=1. This is set in the base LiteralExpr class,
+    // so only an INT constant is tested, all others are the same.
+    verifyNdv("10", 1);
+
+    // Propagation of const NDV. All expressions save CASE use
+    // the same max logic.
+    verifyNdv("10 * 3", 1);
+
+    // Planner defines NDV as "number of distinct values
+    // including nulls", but the NDV function (and the stats
+    // from tables) define it as "number of distinct non-null
+    // values".
+    verifyNdv("null", 1);
+  }
+
   @Test
   public void TestCaseExprBasic() throws ImpalaException {
     // All constants tests
@@ -91,8 +114,35 @@ public class ExprNdvTest extends FrontendTestBase {
   }
 
   @Test
-  public void TestCaseExprMissingStats() throws ImpalaException {
+  public void testExprBasic() throws ImpalaException {
+    // Baseline
+    verifyNdv("id", 7300);
+
+    // Produces a constant, but not worth worrying about.
+    // Actual NDV = 1 (or 2 if nullable)
+    verifyNdv("id * 0", 7300);
+
+    // Should not change NDV
+    verifyNdv("CAST(id AS VARCHAR)", 7300);
+
+    // All expressions save CASE use the max logic.
+    verifyNdv("id + 2", 7300);
+    verifyNdv("id * 2", 7300);
+
+    // IMPALA-7603: Should multiply NDVs, but does Max instead
+    verifyNdv("id + int_col", 7300);
+    verifyNdv("id * int_col", 7300);
 
+    // nullValue returns a boolean, so should be NDV=2
+    // Actual is wrong because it uses a generic calc:
+    // NDV(f(x)) = NDV(x).
+    // Should be:
+    // NDV(f(x)) = max(NDV(x), NDV(type(f)))
+    verifyNdv("nullValue(id)", 7300);
+  }
+
+  @Test
+  public void TestCaseExprMissingStats() throws ImpalaException {
     // Consts still work
     verifyNdvTwoTable("case when a.id = 1 then 'yes' " +
                       "when tiny.a = 'whatever' then 'maybe' " +
@@ -109,4 +159,53 @@ public class ExprNdvTest extends FrontendTestBase {
     verifyNdvTwoTable("case when tiny.a = 'whatever' then a.id " +
                       "else 0 end", 7301);
   }
+
+  /**
+   * Test null count handling. After IMPALA-7659, Impala computes a null count,
+   * when gathering stats, but the NDV does not include nulls (except for Boolean
+   * columns) if stats are computed by Impala, but does include nulls if stats are
+   * computed by Hive. This leads to rather bizarre outcomes such as the NDV of a
+   * column = 0 when the null count is greater than zero. This is clearly a bug to
+   * be fixed, but a complex one because of Hive and backward compatibility
+   * considerations. This test simply illustrates the current (unfortunate)
+   * behavior. See IMPALA-8094.
+   */
+  @Test
+  public void testNulls() throws ImpalaException {
+    // A table with nulls for which stats have been computed
+    // NDV(a) = 26
+    verifyNdvStmt("SELECT id FROM functional.nullrows", 26);
+    // NDV(f) = 6
+    verifyNdvStmt("SELECT some_nulls FROM functional.nullrows", 6);
+    // NDV(c) = 0 (all nulls), but add 1 for nulls
+    // Bug: See IMPALA-7310, IMPALA-8094
+    //verifyNdvStmt("SELECT null_str FROM functional.nullrows", 1);
+    verifyNdvStmt("SELECT null_str FROM functional.nullrows", 0);
+    // NDV(b) = 1, add 1 for nulls
+    // Bug: Same as above
+    //verifyNdvStmt("SELECT blanks FROM functional.nullrows", 2);
+    verifyNdvStmt("SELECT blank FROM functional.nullrows", 1);
+
+    // Same schema, one row
+    verifyNdvStmt("SELECT a FROM functional.nulltable", 1);
+    // Bug: Same as above
+    //verifyNdvStmt("SELECT c FROM functional.nulltable", 1);
+    verifyNdvStmt("SELECT c FROM functional.nulltable", 0);
+
+    // 11K rows, no stats
+    // Bug: Should come up with some estimate from size
+    verifyNdvStmt("SELECT id FROM functional.manynulls", -1);
+
+    // Table with 8 rows, NDV(year) = 1,
+    // null count for year is 0, so no adjustment.
+    verifyNdvStmt("SELECT year FROM functional.alltypestiny", 1);
+
+    // Test with non-nullable columns.
+    // NDV value from stats not increased by one here.
+    verifyNdvStmt("SELECT id FROM functional_kudu.alltypestiny", 8);
+    // But, is increased for a nullable column.
+    // Bug: Same as above
+    //verifyNdvStmt("SELECT year FROM functional_kudu.alltypestiny", 2);
+    verifyNdvStmt("SELECT year FROM functional_kudu.alltypestiny", 1);
+  }
 }
diff --git a/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java b/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java
index b91c526..7536fbb 100644
--- a/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java
@@ -26,8 +26,10 @@ import java.util.List;
 
 import org.apache.impala.catalog.ScalarType;
 import org.apache.impala.common.AnalysisException;
+import org.apache.impala.common.AnalysisSessionFixture;
 import org.apache.impala.common.FrontendTestBase;
 import org.apache.impala.common.ImpalaException;
+import org.apache.impala.common.QueryFixture;
 import org.apache.impala.common.SqlCastException;
 import org.apache.impala.rewrite.BetweenToCompoundRule;
 import org.apache.impala.rewrite.EqualityDisjunctsToInRule;
@@ -51,13 +53,31 @@ import com.google.common.collect.Lists;
  * Tests ExprRewriteRules.
  */
 public class ExprRewriteRulesTest extends FrontendTestBase {
+  /**
+   * Wraps an ExprRewriteRule to count how many times it's been applied.
+   */
+  public static class CountingRewriteRuleWrapper implements ExprRewriteRule {
+    int rewrites_;
+    final ExprRewriteRule wrapped_;
+
+    CountingRewriteRuleWrapper(ExprRewriteRule wrapped) {
+      this.wrapped_ = wrapped;
+    }
+
+    @Override
+    public Expr apply(Expr expr, Analyzer analyzer) throws AnalysisException {
+      Expr ret = wrapped_.apply(expr, analyzer);
+      if (expr != ret) { rewrites_++; }
+      return ret;
+    }
+  }
 
   /**
    * Specialized form of the Select fixture which analyzes a query without
    * rewrites. Use this to invoke the rewrite engine within the test itself.
    * Note: no analysis context is created in this case.
    */
-  public static class SelectRewriteFixture extends AnalysisSessionFixture.SelectFixture {
+  public static class SelectRewriteFixture extends QueryFixture.SelectFixture {
     private Analyzer analyzer_;
 
     public SelectRewriteFixture(AnalysisSessionFixture analysisFixture) {
@@ -136,7 +156,7 @@ public class ExprRewriteRulesTest extends FrontendTestBase {
     }
   }
 
-  public static AnalysisSessionFixture session = new AnalysisSessionFixture(frontend_);
+  public static AnalysisSessionFixture session = new AnalysisSessionFixture();
 
   @BeforeClass
   public static void setup() {
diff --git a/fe/src/test/java/org/apache/impala/common/AbstractFrontendTest.java b/fe/src/test/java/org/apache/impala/common/AbstractFrontendTest.java
new file mode 100644
index 0000000..ff18c19
--- /dev/null
+++ b/fe/src/test/java/org/apache/impala/common/AbstractFrontendTest.java
@@ -0,0 +1,58 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.common;
+
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+
+/**
+ * Base Unit test class that manages the Frontend fixture
+ * to initialize and shut down the front-end, and to remove any
+ * temporary tables created by the test. Derive tests from this class
+ * if the test does anything "special." Derive from
+ * {@link FrontendTestBase} for routine tests that can leverage the
+ * many default functions available.
+ *
+ * A special test is one that:
+ *
+ * * Needs specialized query options.
+ * * Needs specialized query handling, such as inspecting bits of the
+ *   AST, decorated AST or query plan.
+ *
+ * In these cases, use the fixtures directly as they provide more control
+ * than do the generic methods in FrontendTestBase.
+ */
+public abstract class AbstractFrontendTest {
+  protected static FrontendFixture feFixture_ = FrontendFixture.instance();
+
+  @BeforeClass
+  public static void setUp() throws Exception {
+    feFixture_.setUp();
+  }
+
+  @AfterClass
+  public static void cleanUp() throws Exception {
+    feFixture_.cleanUp();
+  }
+
+  @After
+  public void tearDown() {
+    feFixture_.tearDown();
+  }
+}
diff --git a/fe/src/test/java/org/apache/impala/common/AnalysisSessionFixture.java b/fe/src/test/java/org/apache/impala/common/AnalysisSessionFixture.java
new file mode 100644
index 0000000..b7ff0fe
--- /dev/null
+++ b/fe/src/test/java/org/apache/impala/common/AnalysisSessionFixture.java
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.common;
+
+import org.apache.impala.analysis.ExprRewriterTest;
+import org.apache.impala.catalog.Catalog;
+import org.apache.impala.service.Frontend;
+import org.apache.impala.testutil.ImpaladTestCatalog;
+import org.apache.impala.testutil.TestUtils;
+import org.apache.impala.thrift.TQueryCtx;
+import org.apache.impala.thrift.TQueryOptions;
+
+/**
+ * Session fixture for analyzer tests. Holds state shared across test cases such
+ * as the front-end, the user, the database, and query options. Queries created
+ * from this fixture start with these defaults, but each query can change them
+ * as needed for that particular test case.
+ *
+ * This fixture is analogous to a user session. Though, unlike a real session,
+ * test can change the database, options and user per-query without changing
+ * the session settings.
+ *
+ * The session fixture is created once per test file, then query fixtures perform
+ * the work needed for each particular query. It is often helpful to wrap the
+ * query fixtures in a function if the same setup is used over and over.
+ * See {@link ExprRewriterTest} for  example usage.
+ */
+public class AnalysisSessionFixture {
+
+  private final FrontendFixture feFixture_ = FrontendFixture.instance();
+  // Query options to be used for all queries. Can be overridden per-query.
+  private final TQueryOptions queryOptions_;
+  // Default database for all queries.
+  private String db_ = Catalog.DEFAULT_DB;
+  // Default user for all queries.
+  private String user_ = System.getProperty("user.name");
+
+  public AnalysisSessionFixture() {
+    queryOptions_ = new TQueryOptions();
+  }
+
+  public AnalysisSessionFixture setDB(String db) {
+    db_ = db;
+    return this;
+  }
+
+  public AnalysisSessionFixture setUser(String user) {
+    user_ = user;
+    return this;
+  }
+
+  public TQueryOptions options() { return queryOptions_; }
+  public String db() { return db_; }
+  public String user() { return user_; }
+  public Frontend frontend() { return feFixture_.frontend(); }
+  public ImpaladTestCatalog catalog() { return feFixture_.catalog(); }
+
+  /**
+   * Disable the optional expression rewrites.
+   */
+  public AnalysisSessionFixture disableExprRewrite() {
+    queryOptions_.setEnable_expr_rewrites(false);
+    return this;
+  }
+
+  public TQueryOptions cloneOptions() {
+    return new TQueryOptions(queryOptions_);
+  }
+
+  public TQueryCtx queryContext() {
+    return TestUtils.createQueryContext(db_, user_, cloneOptions());
+  }
+
+}
diff --git a/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java b/fe/src/test/java/org/apache/impala/common/FrontendFixture.java
similarity index 59%
copy from fe/src/test/java/org/apache/impala/common/FrontendTestBase.java
copy to fe/src/test/java/org/apache/impala/common/FrontendFixture.java
index 2b6a640..1f696f0 100644
--- a/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java
+++ b/fe/src/test/java/org/apache/impala/common/FrontendFixture.java
@@ -26,12 +26,10 @@ import java.util.List;
 
 import org.apache.impala.analysis.AnalysisContext;
 import org.apache.impala.analysis.AnalysisContext.AnalysisResult;
-import org.apache.impala.analysis.Analyzer;
 import org.apache.impala.analysis.ColumnDef;
 import org.apache.impala.analysis.CreateTableStmt;
 import org.apache.impala.analysis.CreateViewStmt;
 import org.apache.impala.analysis.FunctionName;
-import org.apache.impala.analysis.InsertStmt;
 import org.apache.impala.analysis.ParseNode;
 import org.apache.impala.analysis.Parser;
 import org.apache.impala.analysis.QueryStmt;
@@ -60,86 +58,100 @@ import org.apache.impala.thrift.TFunctionBinaryType;
 import org.apache.impala.thrift.TQueryCtx;
 import org.apache.impala.thrift.TQueryOptions;
 import org.apache.impala.util.EventSequence;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.Assert;
-import org.junit.BeforeClass;
 
 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
 
 /**
- * Base class for most frontend tests. Contains common functions for unit testing
- * various components, e.g., ParsesOk(), ParserError(), AnalyzesOk(), AnalysisError(),
- * as well as helper functions for creating test-local tables/views and UDF/UDAs.
+ * Test fixture for the front-end as a whole. Logically equivalent to a running
+ * Impala and HMS cluster. Manages the test metadata catalog.
+ * Use {@link SessionFixture} to represent a user session (with a user name,
+ * session options, and so on), and a {@link QueryFixture} to represent a
+ * single query.
+ *
+ * While this fixture provides methods to parse and analyze a query, these
+ * actions are done with default options and handle the general case. Use
+ * the above fixtures for greater control, and to get at multiple bits of a
+ * query.
+ *
+ * {@link AbstractFrontendTest} manages a front-end fixture including setup
+ * and teardown. Use it as the base class for new tests that wish to use the
+ * test fixtures. {@link FrontendTestBase} extends AbstractFrontendTest and
+ * wraps the fixture in a set of functions which act as shims for legacy tests.
  */
-// TODO: Revise to leverage AnalysisFixure
-public class FrontendTestBase {
-  protected static ImpaladTestCatalog catalog_ = new ImpaladTestCatalog();
-  protected static Frontend frontend_ = new Frontend(
+
+public class FrontendFixture {
+  // Single instance used for all tests. Logically equivalent to a
+  // single Impala cluster used by many clients.
+  protected static final FrontendFixture instance_ = new FrontendFixture();
+
+  // The test catalog that can hold test-only tables.
+  protected final ImpaladTestCatalog catalog_ = new ImpaladTestCatalog();
+
+  // The actual Impala frontend that backs this fixture.
+  protected final Frontend frontend_ = new Frontend(
       AuthorizationConfig.createAuthDisabledConfig(), catalog_);
 
-  // Test-local list of test databases and tables. These are cleaned up in @After.
-  protected final List<Db> testDbs_ = Lists.newArrayList();
-  protected final List<Table> testTables_ = Lists.newArrayList();
-  protected final String[][] hintStyles_ = new String[][] {
-      new String[] { "/* +", "*/" }, // traditional commented hint
-      new String[] { "-- +", "\n" }, // eol commented hint
-      new String[] { "[", "]" } // legacy style
-  };
-
-  @BeforeClass
-  public static void setUp() throws Exception {
-    RuntimeEnv.INSTANCE.setTestEnv(true);
-  }
+  // Test-local list of test databases and tables.
+  protected final List<Db> testDbs_ = new ArrayList<>();
+  protected final List<Table> testTables_ = new ArrayList<>();
 
-  @AfterClass
-  public static void cleanUp() throws Exception {
-    RuntimeEnv.INSTANCE.reset();
-    catalog_.close();
+  protected final AnalysisSessionFixture defaultSession_;
+
+  public static FrontendFixture instance() {
+    return instance_;
   }
 
-  // Adds a Udf: default.name(args) to the catalog.
-  // TODO: we could consider having this be the sql to run instead but that requires
-  // connecting to the BE.
-  protected Function addTestFunction(String name,
-      ArrayList<ScalarType> args, boolean varArgs) {
-    return addTestFunction("default", name, args, varArgs);
+  /**
+   * Private constructor. Use {@link #instance()} to get access to
+   * the front-end fixture.
+   */
+  private FrontendFixture() {
+    defaultSession_ = new AnalysisSessionFixture();
   }
 
-  protected Function addTestFunction(String name,
-      ScalarType arg, boolean varArgs) {
-    return addTestFunction("default", name, Lists.newArrayList(arg), varArgs);
+  /**
+   * Call this from the test's @BeforeClass method.
+   */
+  public void setUp() throws Exception {
+    RuntimeEnv.INSTANCE.setTestEnv(true);
   }
 
-  protected Function addTestFunction(String db, String fnName,
-      ArrayList<ScalarType> args, boolean varArgs) {
-    ArrayList<Type> argTypes = Lists.newArrayList();
-    argTypes.addAll(args);
-    Function fn = ScalarFunction.createForTesting(
-        db, fnName, argTypes, Type.INT, "/Foo", "Foo.class", null,
-        null, TFunctionBinaryType.NATIVE);
-    fn.setHasVarArgs(varArgs);
-    catalog_.addFunction(fn);
-    return fn;
+  /**
+   * Call this from the test's @AfterClass method.
+   */
+  public void cleanUp() throws Exception {
+    RuntimeEnv.INSTANCE.reset();
+    catalog_.close();
   }
 
-  protected void addTestUda(String name, Type retType, Type... argTypes) {
-    FunctionName fnName = new FunctionName("default", name);
-    catalog_.addFunction(
-        AggregateFunction.createForTesting(
-            fnName, Lists.newArrayList(argTypes), retType, retType,
-            null, "init_fn_symbol", "update_fn_symbol", null, null,
-            null, null, null, TFunctionBinaryType.NATIVE));
+  /**
+   * Call this from the test's @After method.
+   */
+  public void tearDown() {
+    clearTestTables();
+    clearTestDbs();
   }
 
+  public Frontend frontend() { return frontend_; }
+  public ImpaladTestCatalog catalog() { return catalog_; }
+
+  /**
+   * Returns the default session with default options. Create your own
+   * instance if your test needs to change any of the options. Any number
+   * of sessions can be active at once.
+   *
+   * @return the default session with default options
+   */
+  public AnalysisSessionFixture session() { return defaultSession_; }
+
   /**
    * Add a new dummy database with the given name to the catalog.
    * Returns the new dummy database.
    * The database is registered in testDbs_ and removed in the @After method.
    */
-  protected Db addTestDb(String dbName, String comment) {
+  public Db addTestDb(String dbName, String comment) {
     Db db = catalog_.getDb(dbName);
     Preconditions.checkState(db == null, "Test db must not already exist.");
     db = new Db(dbName, new org.apache.hadoop.hive.metastore.api.Database(
@@ -162,8 +174,8 @@ public class FrontendTestBase {
    * Returns the new dummy table.
    * The test tables are registered in testTables_ and removed in the @After method.
    */
-  protected Table addTestTable(String createTableSql) {
-    CreateTableStmt createTableStmt = (CreateTableStmt) AnalyzesOk(createTableSql);
+  public Table addTestTable(String createTableSql) {
+    CreateTableStmt createTableStmt = (CreateTableStmt) analyzeStmt(createTableSql);
     Db db = catalog_.getDb(createTableStmt.getDb());
     Preconditions.checkNotNull(db, "Test tables must be created in an existing db.");
     org.apache.hadoop.hive.metastore.api.Table msTbl =
@@ -205,12 +217,18 @@ public class FrontendTestBase {
     return dummyTable;
   }
 
+  protected void clearTestTables() {
+    for (Table testTable: testTables_) {
+      testTable.getDb().removeTable(testTable.getName());
+    }
+  }
+
   /**
    * Adds a test-local view to the catalog based on the given CREATE VIEW sql.
    * The test views are registered in testTables_ and removed in the @After method.
    * Returns the new view.
    */
-  protected Table addTestView(String createViewSql) {
+  public Table addTestView(String createViewSql) {
     return addTestView(catalog_, createViewSql);
   }
 
@@ -219,94 +237,58 @@ public class FrontendTestBase {
    * The test views are registered in testTables_ and removed in the @After method.
    * Returns the new view.
    */
-  protected Table addTestView(Catalog catalog, String createViewSql) {
-    CreateViewStmt createViewStmt = (CreateViewStmt) AnalyzesOk(createViewSql);
+  public Table addTestView(Catalog catalog, String createViewSql) {
+    CreateViewStmt createViewStmt = (CreateViewStmt) analyzeStmt(createViewSql);
     Db db = catalog.getDb(createViewStmt.getDb());
     Preconditions.checkNotNull(db, "Test views must be created in an existing db.");
     // Do not analyze the stmt to avoid applying rewrites that would alter the view
     // definition. We want to model real views as closely as possible.
-    QueryStmt viewStmt = (QueryStmt) ParsesOk(createViewStmt.getInlineViewDef());
+    QueryStmt viewStmt = (QueryStmt) parseStmt(createViewStmt.getInlineViewDef());
     View dummyView = View.createTestView(db, createViewStmt.getTbl(), viewStmt);
     db.addTable(dummyView);
     testTables_.add(dummyView);
     return dummyView;
   }
 
-  protected Table addAllScalarTypesTestTable() {
-    addTestDb("allscalartypesdb", "");
-    return addTestTable("create table allscalartypes (" +
-      "bool_col boolean, tinyint_col tinyint, smallint_col smallint, int_col int, " +
-      "bigint_col bigint, float_col float, double_col double, dec1 decimal(9,0), " +
-      "d2 decimal(10, 0), d3 decimal(20, 10), d4 decimal(38, 38), d5 decimal(10, 5), " +
-      "timestamp_col timestamp, string_col string, varchar_col varchar(50), " +
-      "char_col char (30))");
-  }
-
-  protected void clearTestTables() {
-    for (Table testTable: testTables_) {
-      testTable.getDb().removeTable(testTable.getName());
-    }
-  }
-
-  /**
-   * Inject the hint into the pattern using hint location.
-   *
-   * Example:
-   *   pattern: insert %s into t %s select * from t
-   *   hint: <token_hint_begin> hint_with_args(a) <token_hint_end>
-   *   loc: Start(=oracle style) | End(=traditional style)
-   */
-  protected String InjectInsertHint(String pattern, String hint,
-      InsertStmt.HintLocation loc) {
-    final String oracleHint = (loc == InsertStmt.HintLocation.Start) ? hint : "";
-    final String defaultHint  = (loc == InsertStmt.HintLocation.End) ? hint : "";
-    return String.format(pattern, oracleHint, defaultHint);
-  }
-
-  @After
-  public void tearDown() {
-    clearTestTables();
-    clearTestDbs();
-  }
-
-  /**
-   * Parse 'stmt' and return the root StatementBase.
-   */
-  public StatementBase ParsesOk(String stmt) {
-    try {
-      StatementBase node = Parser.parse(stmt);
-      assertNotNull(node);
-      return node;
-    } catch (AnalysisException e) {
-      fail("\nParser error:\n" + e.getMessage());
-      throw new IllegalStateException(); // Keep compiler happy
-    }
+  // Adds a Udf: default.name(args) to the catalog.
+  // TODO: we could consider having this be the sql to run instead but that requires
+  // connecting to the BE.
+  public Function addTestFunction(String name,
+      ArrayList<ScalarType> args, boolean varArgs) {
+    return addTestFunction("default", name, args, varArgs);
   }
 
-  /**
-   * Analyze 'stmt', expecting it to pass. Asserts in case of analysis error.
-   */
-  public ParseNode AnalyzesOk(String stmt) {
-    return AnalyzesOk(stmt, createAnalysisCtx(), null);
+  public Function addTestFunction(String name,
+      ScalarType arg, boolean varArgs) {
+    return addTestFunction("default", name, Lists.newArrayList(arg), varArgs);
   }
 
-  public ParseNode AnalyzesOk(String stmt, AnalysisContext analysisCtx) {
-    return AnalyzesOk(stmt, analysisCtx, null);
+  public Function addTestFunction(String db, String fnName,
+      ArrayList<ScalarType> args, boolean varArgs) {
+    List<Type> argTypes = new ArrayList<>();
+    argTypes.addAll(args);
+    Function fn = ScalarFunction.createForTesting(
+        db, fnName, argTypes, Type.INT, "/Foo", "Foo.class", null,
+        null, TFunctionBinaryType.NATIVE);
+    fn.setHasVarArgs(varArgs);
+    catalog_.addFunction(fn);
+    return fn;
   }
 
-  /**
-   * Analyze 'stmt', expecting it to pass. Asserts in case of analysis error.
-   * If 'expectedWarning' is not null, asserts that a warning is produced.
-   */
-  public ParseNode AnalyzesOk(String stmt, String expectedWarning) {
-    return AnalyzesOk(stmt, createAnalysisCtx(), expectedWarning);
+  public void addTestUda(String name, Type retType, Type... argTypes) {
+    FunctionName fnName = new FunctionName("default", name);
+    catalog_.addFunction(
+        AggregateFunction.createForTesting(
+            fnName, Lists.newArrayList(argTypes), retType, retType,
+            null, "init_fn_symbol", "update_fn_symbol", null, null,
+            null, null, null, TFunctionBinaryType.NATIVE));
   }
 
-  protected AnalysisContext createAnalysisCtx() {
+  public AnalysisContext createAnalysisCtx() {
     return createAnalysisCtx(Catalog.DEFAULT_DB);
   }
 
-  protected AnalysisContext createAnalysisCtx(String defaultDb) {
+  public AnalysisContext createAnalysisCtx(String defaultDb) {
     TQueryCtx queryCtx = TestUtils.createQueryContext(
         defaultDb, System.getProperty("user.name"));
     EventSequence timeline = new EventSequence("Frontend Test Timeline");
@@ -315,7 +297,7 @@ public class FrontendTestBase {
     return analysisCtx;
   }
 
-  protected AnalysisContext createAnalysisCtx(TQueryOptions queryOptions) {
+  public AnalysisContext createAnalysisCtx(TQueryOptions queryOptions) {
     TQueryCtx queryCtx = TestUtils.createQueryContext();
     queryCtx.client_request.query_options = queryOptions;
     EventSequence timeline = new EventSequence("Frontend Test Timeline");
@@ -324,11 +306,11 @@ public class FrontendTestBase {
     return analysisCtx;
   }
 
-  protected AnalysisContext createAnalysisCtx(AuthorizationConfig authzConfig) {
+  public AnalysisContext createAnalysisCtx(AuthorizationConfig authzConfig) {
     return createAnalysisCtx(authzConfig, System.getProperty("user.name"));
   }
 
-  protected AnalysisContext createAnalysisCtx(AuthorizationConfig authzConfig,
+  public AnalysisContext createAnalysisCtx(AuthorizationConfig authzConfig,
       String user) {
     TQueryCtx queryCtx = TestUtils.createQueryContext(Catalog.DEFAULT_DB, user);
     EventSequence timeline = new EventSequence("Frontend Test Timeline");
@@ -336,17 +318,36 @@ public class FrontendTestBase {
     return analysisCtx;
   }
 
-  protected AnalysisContext createAnalysisCtxUsingHiveColLabels() {
-    AnalysisContext analysisCtx = createAnalysisCtx();
-    analysisCtx.setUseHiveColLabels(true);
-    return analysisCtx;
+  /**
+   * Parse 'stmt' and return the root StatementBase.
+   */
+  public StatementBase parseStmt(String stmt) {
+    try {
+      StatementBase node = Parser.parse(stmt);
+      assertNotNull(node);
+      return node;
+    } catch (AnalysisException e) {
+      fail("Parser error:\n" + e.getMessage());
+      throw new IllegalStateException(); // Keep compiler happy
+    }
+  }
+
+  public AnalysisResult parseAndAnalyze(String stmt, AnalysisContext ctx)
+      throws ImpalaException {
+    StatementBase parsedStmt = Parser.parse(stmt, ctx.getQueryOptions());
+    StmtMetadataLoader mdLoader =
+        new StmtMetadataLoader(frontend_, ctx.getQueryCtx().session.database, null);
+    StmtTableCache stmtTableCache = mdLoader.loadTables(parsedStmt);
+    return ctx.analyzeAndAuthorize(parsedStmt, stmtTableCache,
+        frontend_.getAuthzChecker());
   }
 
   /**
    * Analyze 'stmt', expecting it to pass. Asserts in case of analysis error.
    * If 'expectedWarning' is not null, asserts that a warning is produced.
    */
-  public ParseNode AnalyzesOk(String stmt, AnalysisContext ctx, String expectedWarning) {
+  public ParseNode analyzeStmt(String stmt, AnalysisContext ctx,
+      String expectedWarning) {
     try {
       AnalysisResult analysisResult = parseAndAnalyze(stmt, ctx);
       if (expectedWarning != null) {
@@ -369,88 +370,15 @@ public class FrontendTestBase {
     } catch (Exception e) {
       e.printStackTrace();
       fail("Error during analysis:\n" + e.toString() + "\nsql:\n" + stmt);
+      throw new IllegalStateException(); // Keep compiler happy
     }
-    return null;
-  }
-
-  /**
-   * Analyzes the given statement without performing rewrites or authorization.
-   */
-  public StatementBase AnalyzesOkNoRewrite(StatementBase stmt) throws ImpalaException {
-    AnalysisContext ctx = createAnalysisCtx();
-    StmtMetadataLoader mdLoader =
-        new StmtMetadataLoader(frontend_, ctx.getQueryCtx().session.database, null);
-    StmtTableCache loadedTables = mdLoader.loadTables(stmt);
-    Analyzer analyzer = ctx.createAnalyzer(loadedTables);
-    stmt.analyze(analyzer);
-    return stmt;
-  }
-
-  /**
-   * Asserts if stmt passes analysis.
-   */
-  public void AnalysisError(String stmt) {
-    AnalysisError(stmt, null);
-  }
-
-  /**
-   * Asserts if stmt passes analysis or the error string doesn't match and it
-   * is non-null.
-   */
-  public void AnalysisError(String stmt, String expectedErrorString) {
-    AnalysisError(stmt, createAnalysisCtx(), expectedErrorString);
-  }
-
-  /**
-   * Asserts if stmt passes analysis or the error string doesn't match and it
-   * is non-null.
-   */
-  public void AnalysisError(String stmt, AnalysisContext ctx, String expectedErrorString) {
-    Preconditions.checkNotNull(expectedErrorString, "No expected error message given.");
-    try {
-      AnalysisResult analysisResult = parseAndAnalyze(stmt, ctx);
-      Preconditions.checkNotNull(analysisResult.getStmt());
-    } catch (Exception e) {
-      String errorString = e.getMessage();
-      Preconditions.checkNotNull(errorString, "Stack trace lost during exception.");
-      String msg = "got error:\n" + errorString + "\nexpected:\n" + expectedErrorString;
-      // TODO: This logic can be removed.
-      // Different versions of Hive have slightly different error messages;
-      // we normalize here as follows:
-      // 'No FileSystem for Scheme "x"' -> 'No FileSystem for scheme: x'
-      if (errorString.contains("No FileSystem for scheme ")) {
-        errorString = errorString.replace("\"", "");
-        errorString = errorString.replace("No FileSystem for scheme ",
-            "No FileSystem for scheme: ");
-      }
-      Assert.assertTrue(msg, errorString.startsWith(expectedErrorString));
-      return;
-    }
-    fail("Stmt didn't result in analysis error: " + stmt);
-  }
-
-  protected AnalysisResult parseAndAnalyze(String stmt, AnalysisContext ctx)
-      throws ImpalaException {
-    return parseAndAnalyze(stmt, ctx, frontend_);
-  }
-
-  protected AnalysisResult parseAndAnalyze(String stmt, AnalysisContext ctx, Frontend fe)
-      throws ImpalaException {
-    StatementBase parsedStmt = Parser.parse(stmt, ctx.getQueryOptions());
-    StmtMetadataLoader mdLoader =
-        new StmtMetadataLoader(fe, ctx.getQueryCtx().session.database, null);
-    StmtTableCache stmtTableCache = mdLoader.loadTables(parsedStmt);
-    return ctx.analyzeAndAuthorize(parsedStmt, stmtTableCache, fe.getAuthzChecker());
   }
 
   /**
-   * Creates an authorization config for creating an AnalysisContext with
-   * authorization enabled.
+   * Analyze 'stmt', expecting it to pass. Asserts in case of analysis error.
+   * Uses default options; use {@link QueryFixture} for greater control.
    */
-  protected AuthorizationConfig createAuthorizationConfig() {
-    AuthorizationConfig authzConfig = AuthorizationConfig.createHadoopGroupAuthConfig(
-        "server1", null, System.getenv("IMPALA_HOME") +
-            "/fe/src/test/resources/sentry-site.xml");
-    return authzConfig;
+  public ParseNode analyzeStmt(String stmt) {
+    return analyzeStmt(stmt, createAnalysisCtx(), null);
   }
 }
diff --git a/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java b/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java
index 2b6a640..837fd8c 100644
--- a/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java
+++ b/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java
@@ -17,55 +17,31 @@
 
 package org.apache.impala.common;
 
-import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.fail;
 
 import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
 
 import org.apache.impala.analysis.AnalysisContext;
 import org.apache.impala.analysis.AnalysisContext.AnalysisResult;
 import org.apache.impala.analysis.Analyzer;
-import org.apache.impala.analysis.ColumnDef;
-import org.apache.impala.analysis.CreateTableStmt;
-import org.apache.impala.analysis.CreateViewStmt;
-import org.apache.impala.analysis.FunctionName;
 import org.apache.impala.analysis.InsertStmt;
 import org.apache.impala.analysis.ParseNode;
 import org.apache.impala.analysis.Parser;
-import org.apache.impala.analysis.QueryStmt;
 import org.apache.impala.analysis.StatementBase;
 import org.apache.impala.analysis.StmtMetadataLoader;
 import org.apache.impala.analysis.StmtMetadataLoader.StmtTableCache;
 import org.apache.impala.authorization.AuthorizationConfig;
-import org.apache.impala.catalog.AggregateFunction;
 import org.apache.impala.catalog.Catalog;
-import org.apache.impala.catalog.CatalogException;
-import org.apache.impala.catalog.Column;
 import org.apache.impala.catalog.Db;
 import org.apache.impala.catalog.Function;
-import org.apache.impala.catalog.HdfsTable;
-import org.apache.impala.catalog.KuduTable;
-import org.apache.impala.catalog.ScalarFunction;
 import org.apache.impala.catalog.ScalarType;
 import org.apache.impala.catalog.Table;
 import org.apache.impala.catalog.Type;
-import org.apache.impala.catalog.View;
-import org.apache.impala.service.CatalogOpExecutor;
 import org.apache.impala.service.Frontend;
 import org.apache.impala.testutil.ImpaladTestCatalog;
-import org.apache.impala.testutil.TestUtils;
-import org.apache.impala.thrift.TFunctionBinaryType;
-import org.apache.impala.thrift.TQueryCtx;
 import org.apache.impala.thrift.TQueryOptions;
-import org.apache.impala.util.EventSequence;
-import org.junit.After;
-import org.junit.AfterClass;
 import org.junit.Assert;
-import org.junit.BeforeClass;
 
-import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
 
@@ -73,33 +49,23 @@ import com.google.common.collect.Lists;
  * Base class for most frontend tests. Contains common functions for unit testing
  * various components, e.g., ParsesOk(), ParserError(), AnalyzesOk(), AnalysisError(),
  * as well as helper functions for creating test-local tables/views and UDF/UDAs.
+ *
+ * Extend "typical" tests from this class. For deeper, or more specialized tests,
+ * extend from {@link AbstractFrontendTest} and use the various fixtures directly.
+ * This class is also used for "legacy" tests that used the many functions here
+ * rather than the newer fixtures.
  */
-// TODO: Revise to leverage AnalysisFixure
-public class FrontendTestBase {
-  protected static ImpaladTestCatalog catalog_ = new ImpaladTestCatalog();
-  protected static Frontend frontend_ = new Frontend(
-      AuthorizationConfig.createAuthDisabledConfig(), catalog_);
-
-  // Test-local list of test databases and tables. These are cleaned up in @After.
-  protected final List<Db> testDbs_ = Lists.newArrayList();
-  protected final List<Table> testTables_ = Lists.newArrayList();
+public class FrontendTestBase extends AbstractFrontendTest {
+  // Temporary shim until tests are updated to use the
+  // frontend fixture.
+  protected static Frontend frontend_ = feFixture_.frontend();
+  protected static ImpaladTestCatalog catalog_ = feFixture_.catalog();
   protected final String[][] hintStyles_ = new String[][] {
       new String[] { "/* +", "*/" }, // traditional commented hint
       new String[] { "-- +", "\n" }, // eol commented hint
       new String[] { "[", "]" } // legacy style
   };
 
-  @BeforeClass
-  public static void setUp() throws Exception {
-    RuntimeEnv.INSTANCE.setTestEnv(true);
-  }
-
-  @AfterClass
-  public static void cleanUp() throws Exception {
-    RuntimeEnv.INSTANCE.reset();
-    catalog_.close();
-  }
-
   // Adds a Udf: default.name(args) to the catalog.
   // TODO: we could consider having this be the sql to run instead but that requires
   // connecting to the BE.
@@ -115,23 +81,11 @@ public class FrontendTestBase {
 
   protected Function addTestFunction(String db, String fnName,
       ArrayList<ScalarType> args, boolean varArgs) {
-    ArrayList<Type> argTypes = Lists.newArrayList();
-    argTypes.addAll(args);
-    Function fn = ScalarFunction.createForTesting(
-        db, fnName, argTypes, Type.INT, "/Foo", "Foo.class", null,
-        null, TFunctionBinaryType.NATIVE);
-    fn.setHasVarArgs(varArgs);
-    catalog_.addFunction(fn);
-    return fn;
+    return feFixture_.addTestFunction(db, fnName, args, varArgs);
   }
 
   protected void addTestUda(String name, Type retType, Type... argTypes) {
-    FunctionName fnName = new FunctionName("default", name);
-    catalog_.addFunction(
-        AggregateFunction.createForTesting(
-            fnName, Lists.newArrayList(argTypes), retType, retType,
-            null, "init_fn_symbol", "update_fn_symbol", null, null,
-            null, null, null, TFunctionBinaryType.NATIVE));
+    feFixture_.addTestUda(name, retType, argTypes);
   }
 
   /**
@@ -140,19 +94,7 @@ public class FrontendTestBase {
    * The database is registered in testDbs_ and removed in the @After method.
    */
   protected Db addTestDb(String dbName, String comment) {
-    Db db = catalog_.getDb(dbName);
-    Preconditions.checkState(db == null, "Test db must not already exist.");
-    db = new Db(dbName, new org.apache.hadoop.hive.metastore.api.Database(
-        dbName, comment, "", Collections.<String, String>emptyMap()));
-    catalog_.addDb(db);
-    testDbs_.add(db);
-    return db;
-  }
-
-  protected void clearTestDbs() {
-    for (Db testDb: testDbs_) {
-      catalog_.removeDb(testDb.getName());
-    }
+    return feFixture_.addTestDb(dbName, comment);
   }
 
   /**
@@ -163,46 +105,7 @@ public class FrontendTestBase {
    * The test tables are registered in testTables_ and removed in the @After method.
    */
   protected Table addTestTable(String createTableSql) {
-    CreateTableStmt createTableStmt = (CreateTableStmt) AnalyzesOk(createTableSql);
-    Db db = catalog_.getDb(createTableStmt.getDb());
-    Preconditions.checkNotNull(db, "Test tables must be created in an existing db.");
-    org.apache.hadoop.hive.metastore.api.Table msTbl =
-        CatalogOpExecutor.createMetaStoreTable(createTableStmt.toThrift());
-    Table dummyTable = Table.fromMetastoreTable(db, msTbl);
-    if (dummyTable instanceof HdfsTable) {
-      List<ColumnDef> columnDefs = Lists.newArrayList(
-          createTableStmt.getPartitionColumnDefs());
-      dummyTable.setNumClusteringCols(columnDefs.size());
-      columnDefs.addAll(createTableStmt.getColumnDefs());
-      for (int i = 0; i < columnDefs.size(); ++i) {
-        ColumnDef colDef = columnDefs.get(i);
-        dummyTable.addColumn(
-            new Column(colDef.getColName(), colDef.getType(), colDef.getComment(), i));
-      }
-      try {
-        HdfsTable hdfsTable = (HdfsTable) dummyTable;
-        hdfsTable.setPrototypePartition(msTbl.getSd());
-      } catch (CatalogException e) {
-        e.printStackTrace();
-        fail("Failed to add test table:\n" + createTableSql);
-      }
-    } else if (dummyTable instanceof KuduTable) {
-      if (!Table.isExternalTable(msTbl)) {
-        fail("Failed to add table, external kudu table expected:\n" + createTableSql);
-      }
-      try {
-        KuduTable kuduTable = (KuduTable) dummyTable;
-        kuduTable.loadSchemaFromKudu();
-      } catch (ImpalaRuntimeException e) {
-        e.printStackTrace();
-        fail("Failed to add test table:\n" + createTableSql);
-      }
-    } else {
-      fail("Test table type not supported:\n" + createTableSql);
-    }
-    db.addTable(dummyTable);
-    testTables_.add(dummyTable);
-    return dummyTable;
+    return feFixture_.addTestTable(createTableSql);
   }
 
   /**
@@ -211,7 +114,7 @@ public class FrontendTestBase {
    * Returns the new view.
    */
   protected Table addTestView(String createViewSql) {
-    return addTestView(catalog_, createViewSql);
+    return feFixture_.addTestView(createViewSql);
   }
 
   /**
@@ -220,16 +123,7 @@ public class FrontendTestBase {
    * Returns the new view.
    */
   protected Table addTestView(Catalog catalog, String createViewSql) {
-    CreateViewStmt createViewStmt = (CreateViewStmt) AnalyzesOk(createViewSql);
-    Db db = catalog.getDb(createViewStmt.getDb());
-    Preconditions.checkNotNull(db, "Test views must be created in an existing db.");
-    // Do not analyze the stmt to avoid applying rewrites that would alter the view
-    // definition. We want to model real views as closely as possible.
-    QueryStmt viewStmt = (QueryStmt) ParsesOk(createViewStmt.getInlineViewDef());
-    View dummyView = View.createTestView(db, createViewStmt.getTbl(), viewStmt);
-    db.addTable(dummyView);
-    testTables_.add(dummyView);
-    return dummyView;
+    return feFixture_.addTestView(catalog, createViewSql);
   }
 
   protected Table addAllScalarTypesTestTable() {
@@ -242,12 +136,6 @@ public class FrontendTestBase {
       "char_col char (30))");
   }
 
-  protected void clearTestTables() {
-    for (Table testTable: testTables_) {
-      testTable.getDb().removeTable(testTable.getName());
-    }
-  }
-
   /**
    * Inject the hint into the pattern using hint location.
    *
@@ -263,24 +151,11 @@ public class FrontendTestBase {
     return String.format(pattern, oracleHint, defaultHint);
   }
 
-  @After
-  public void tearDown() {
-    clearTestTables();
-    clearTestDbs();
-  }
-
   /**
    * Parse 'stmt' and return the root StatementBase.
    */
   public StatementBase ParsesOk(String stmt) {
-    try {
-      StatementBase node = Parser.parse(stmt);
-      assertNotNull(node);
-      return node;
-    } catch (AnalysisException e) {
-      fail("\nParser error:\n" + e.getMessage());
-      throw new IllegalStateException(); // Keep compiler happy
-    }
+    return feFixture_.parseStmt(stmt);
   }
 
   /**
@@ -303,37 +178,24 @@ public class FrontendTestBase {
   }
 
   protected AnalysisContext createAnalysisCtx() {
-    return createAnalysisCtx(Catalog.DEFAULT_DB);
+    return feFixture_.createAnalysisCtx();
   }
 
   protected AnalysisContext createAnalysisCtx(String defaultDb) {
-    TQueryCtx queryCtx = TestUtils.createQueryContext(
-        defaultDb, System.getProperty("user.name"));
-    EventSequence timeline = new EventSequence("Frontend Test Timeline");
-    AnalysisContext analysisCtx = new AnalysisContext(queryCtx,
-        AuthorizationConfig.createAuthDisabledConfig(), timeline);
-    return analysisCtx;
+    return feFixture_.createAnalysisCtx(defaultDb);
   }
 
   protected AnalysisContext createAnalysisCtx(TQueryOptions queryOptions) {
-    TQueryCtx queryCtx = TestUtils.createQueryContext();
-    queryCtx.client_request.query_options = queryOptions;
-    EventSequence timeline = new EventSequence("Frontend Test Timeline");
-    AnalysisContext analysisCtx = new AnalysisContext(queryCtx,
-        AuthorizationConfig.createAuthDisabledConfig(), timeline);
-    return analysisCtx;
+    return feFixture_.createAnalysisCtx(queryOptions);
   }
 
   protected AnalysisContext createAnalysisCtx(AuthorizationConfig authzConfig) {
-    return createAnalysisCtx(authzConfig, System.getProperty("user.name"));
+    return feFixture_.createAnalysisCtx(authzConfig);
   }
 
   protected AnalysisContext createAnalysisCtx(AuthorizationConfig authzConfig,
       String user) {
-    TQueryCtx queryCtx = TestUtils.createQueryContext(Catalog.DEFAULT_DB, user);
-    EventSequence timeline = new EventSequence("Frontend Test Timeline");
-    AnalysisContext analysisCtx = new AnalysisContext(queryCtx, authzConfig, timeline);
-    return analysisCtx;
+    return feFixture_.createAnalysisCtx(authzConfig, user);
   }
 
   protected AnalysisContext createAnalysisCtxUsingHiveColLabels() {
@@ -347,30 +209,7 @@ public class FrontendTestBase {
    * If 'expectedWarning' is not null, asserts that a warning is produced.
    */
   public ParseNode AnalyzesOk(String stmt, AnalysisContext ctx, String expectedWarning) {
-    try {
-      AnalysisResult analysisResult = parseAndAnalyze(stmt, ctx);
-      if (expectedWarning != null) {
-        List<String> actualWarnings = analysisResult.getAnalyzer().getWarnings();
-        boolean matchedWarning = false;
-        for (String actualWarning: actualWarnings) {
-          if (actualWarning.startsWith(expectedWarning)) {
-            matchedWarning = true;
-            break;
-          }
-        }
-        if (!matchedWarning) {
-          fail(String.format("Did not produce expected warning.\n"
-                  + "Expected warning:\n%s.\nActual warnings:\n%s\nsql:\n%s",
-              expectedWarning, Joiner.on("\n").join(actualWarnings), stmt));
-        }
-      }
-      Preconditions.checkNotNull(analysisResult.getStmt());
-      return analysisResult.getStmt();
-    } catch (Exception e) {
-      e.printStackTrace();
-      fail("Error during analysis:\n" + e.toString() + "\nsql:\n" + stmt);
-    }
-    return null;
+    return feFixture_.analyzeStmt(stmt, ctx, expectedWarning);
   }
 
   /**
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalysisSessionFixture.java b/fe/src/test/java/org/apache/impala/common/QueryFixture.java
similarity index 59%
rename from fe/src/test/java/org/apache/impala/analysis/AnalysisSessionFixture.java
rename to fe/src/test/java/org/apache/impala/common/QueryFixture.java
index 900604a..88c9d57 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalysisSessionFixture.java
+++ b/fe/src/test/java/org/apache/impala/common/QueryFixture.java
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-package org.apache.impala.analysis;
+package org.apache.impala.common;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.fail;
@@ -23,15 +23,17 @@ import static org.junit.Assert.fail;
 import java.io.StringReader;
 import java.util.List;
 
+import org.apache.impala.analysis.AnalysisContext;
 import org.apache.impala.analysis.AnalysisContext.AnalysisResult;
+import org.apache.impala.analysis.Analyzer;
+import org.apache.impala.analysis.Expr;
+import org.apache.impala.analysis.SelectStmt;
+import org.apache.impala.analysis.SqlParser;
+import org.apache.impala.analysis.SqlScanner;
+import org.apache.impala.analysis.StatementBase;
+import org.apache.impala.analysis.StmtMetadataLoader;
 import org.apache.impala.analysis.StmtMetadataLoader.StmtTableCache;
 import org.apache.impala.authorization.AuthorizationConfig;
-import org.apache.impala.catalog.Catalog;
-import org.apache.impala.common.AnalysisException;
-import org.apache.impala.common.ImpalaException;
-import org.apache.impala.common.InternalException;
-import org.apache.impala.rewrite.ExprRewriteRule;
-import org.apache.impala.service.Frontend;
 import org.apache.impala.testutil.TestUtils;
 import org.apache.impala.thrift.TQueryCtx;
 import org.apache.impala.thrift.TQueryOptions;
@@ -41,73 +43,17 @@ import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 
 /**
- * Session fixture for analyzer tests. Holds state shared across test cases such
- * as the frontend, the user, the database, and query options. Queries created
- * from this fixture start with these defaults, but each query can change them
- * as needed for that particular test case.
+ * Base class for per-query processing. This base class encapsulates all the inputs
+ * to a query: the session, context, options, db and user, as well as the input
+ * SQL. All inputs, except for the SQL, "inherit" from the session fixture, but can
+ * be overriden here. For example, if most tests use the "functional" DB, set that
+ * in the session fixture. But, if one particular test needs a different DB, you can
+ * set that here.
  *
- * This fixture is analogous to a user session. Though, unlike a real session,
- * test can change the database, options and user per-query without changing
- * the session settings.
- *
- * The session fixture is created once per test file, then query fixtures perform
- * the work needed for each particular query. It is often helpful to wrap the
- * query fixtures in a function if the same setup is used over and over.
- * See {@link ExprRewriterTest} for  example usage.
+ * Provides the parse step. Use this class directory for parse-only tests.
+ * Subclasses implement various kinds of analysis operations.
  */
-public class AnalysisSessionFixture {
-
-  /**
-   * Base class for per-query processing. This base class encapsulates all the inputs
-   * to a query: the session, context, options, db and user, as well as the input
-   * SQL. All inputs, except for the SQL, "inherit" from the session fixture, but can
-   * be overriden here. For example, if most tests use the "functional" DB, set that
-   * in the session fixture. But, if one particular test needs a different DB, you can
-   * set that here.
-   *
-   * Provides the parse step. Use this class directory for parse-only tests.
-   * Subclasses implement various kinds of analysis operations.
-   */
-  public static class QueryFixture {
-    protected final AnalysisSessionFixture session_;
-    protected final TQueryCtx queryCtx_;
-    protected final TQueryOptions queryOptions_;
-    protected String stmtSql_;
-    protected String db_;
-    protected String user_;
-
-    public QueryFixture(AnalysisSessionFixture session, String stmtSql) {
-      session_ = session;
-      stmtSql_ = stmtSql;
-      queryCtx_ = session_.queryContext();
-      queryOptions_ = session_.cloneOptions();
-      db_ = session_.db();
-      user_ = session_.user();
-    }
-
-    public void setDb(String db) { db_ = db; }
-    public void setUser(String user) { user_ = user; }
-    public TQueryCtx context() { return queryCtx_; }
-    public String stmtSql() { return stmtSql_; }
-    public TQueryOptions options() { return queryOptions_; }
-
-    protected TQueryCtx queryContext() {
-      return TestUtils.createQueryContext(db_, user_, queryOptions_);
-    }
-
-    public StatementBase parse() {
-      // TODO: Use the parser class when available
-      SqlScanner input = new SqlScanner(new StringReader(stmtSql_));
-      SqlParser parser = new SqlParser(input);
-      parser.setQueryOptions(queryOptions_);
-      try {
-        return (StatementBase) parser.parse().value;
-      } catch (Exception e) {
-        throw new IllegalStateException(e);
-      }
-    }
-  }
-
+public class QueryFixture {
   /**
    * Full query analysis, including rewrites. Use this for most tests. The
    * {@link #analyze()} method provides the decorated AST after analysis.
@@ -129,7 +75,7 @@ public class AnalysisSessionFixture {
         stmt_ = parse();
         analysisCtx_ = makeAnalysisContext();
         analysisResult_ = analysisCtx_.analyzeAndAuthorize(stmt_,
-            makeTableCache(stmt_), session_.frontend_.getAuthzChecker());
+            makeTableCache(stmt_), session_.frontend().getAuthzChecker());
         Preconditions.checkNotNull(analysisResult_.getStmt());
         return stmt_;
       } catch (AnalysisException e) {
@@ -158,7 +104,7 @@ public class AnalysisSessionFixture {
      */
     protected StmtTableCache makeTableCache(StatementBase stmt) {
       StmtMetadataLoader mdLoader =
-         new StmtMetadataLoader(session_.frontend_, db_, null);
+         new StmtMetadataLoader(session_.frontend(), db_, null);
       try {
         return mdLoader.loadTables(stmt);
       } catch (InternalException e) {
@@ -211,26 +157,6 @@ public class AnalysisSessionFixture {
    * functional.alltypes.
    */
   public static class SelectFixture extends AnalysisFixture {
-
-    /**
-     * Wraps an ExprRewriteRule to count how many times it's been applied.
-     */
-    static class CountingRewriteRuleWrapper implements ExprRewriteRule {
-      int rewrites_;
-      final ExprRewriteRule wrapped_;
-
-      CountingRewriteRuleWrapper(ExprRewriteRule wrapped) {
-        this.wrapped_ = wrapped;
-      }
-
-      @Override
-      public Expr apply(Expr expr, Analyzer analyzer) throws AnalysisException {
-        Expr ret = wrapped_.apply(expr, analyzer);
-        if (expr != ret) { rewrites_++; }
-        return ret;
-      }
-    }
-
     public String table_ = "functional.alltypes";
     public String exprSql_;
 
@@ -296,46 +222,41 @@ public class AnalysisSessionFixture {
     }
   }
 
-  private final Frontend frontend_;
-  // Query options to be used for all queries. Can be overriden per-query.
-  private final TQueryOptions queryOptions_;
-  // Default database for all queries.
-  private String db_ = Catalog.DEFAULT_DB;
-  // Default user for all queries.
-  private String user_ = System.getProperty("user.name");
-
-  public AnalysisSessionFixture(Frontend frontend) {
-    frontend_ = frontend;
-    queryOptions_ = new TQueryOptions();
-  }
-
-  public AnalysisSessionFixture setDB(String db) {
-    db_ = db;
-    return this;
-  }
-
-  public AnalysisSessionFixture setUser(String user) {
-    user_ = user;
-    return this;
+  protected final AnalysisSessionFixture session_;
+  protected final TQueryCtx queryCtx_;
+  protected final TQueryOptions queryOptions_;
+  protected String stmtSql_;
+  protected String db_;
+  protected String user_;
+
+  public QueryFixture(AnalysisSessionFixture session, String stmtSql) {
+    session_ = session;
+    stmtSql_ = stmtSql;
+    queryCtx_ = session_.queryContext();
+    queryOptions_ = session_.cloneOptions();
+    db_ = session_.db();
+    user_ = session_.user();
   }
 
+  public void setDb(String db) { db_ = db; }
+  public void setUser(String user) { user_ = user; }
+  public TQueryCtx context() { return queryCtx_; }
+  public String stmtSql() { return stmtSql_; }
   public TQueryOptions options() { return queryOptions_; }
-  public String db() { return db_; }
-  public String user() { return user_; }
 
-  /**
-   * Disable the optional expression rewrites.
-   */
-  public AnalysisSessionFixture disableExprRewrite() {
-    queryOptions_.setEnable_expr_rewrites(false);
-    return this;
-  }
-
-  public TQueryOptions cloneOptions() {
-    return new TQueryOptions(queryOptions_);
+  protected TQueryCtx queryContext() {
+    return TestUtils.createQueryContext(db_, user_, queryOptions_);
   }
 
-  public TQueryCtx queryContext() {
-    return TestUtils.createQueryContext(db_, user_, cloneOptions());
+  public StatementBase parse() {
+    // TODO: Use the parser class when available
+    SqlScanner input = new SqlScanner(new StringReader(stmtSql_));
+    SqlParser parser = new SqlParser(input);
+    parser.setQueryOptions(queryOptions_);
+    try {
+      return (StatementBase) parser.parse().value;
+    } catch (Exception e) {
+      throw new IllegalStateException(e);
+    }
   }
-}
+}
\ No newline at end of file
diff --git a/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java b/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java
index 3bbb903..e1e4433 100644
--- a/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java
@@ -93,6 +93,56 @@ public class CardinalityTest extends PlannerTestBase {
         "SELECT COUNT(*) FROM functional.alltypes GROUP BY bool_col", 2);
   }
 
+  /**
+   * Test tables with all-null columns. Test need for IMPALA-7310, NDV of an
+   * all-null column should be 1.
+   */
+  @Test
+  public void testNulls() {
+    verifyCardinality("SELECT null_int FROM functional.nullrows", 26);
+    // a has unique values, so NDV = 26, card = 26/26 = 1
+    verifyCardinality("SELECT null_int FROM functional.nullrows WHERE id = 'x'", 1);
+    // f repeats for 5 rows, so NDV=7, 26/7 =~ 4
+    verifyCardinality("SELECT null_int FROM functional.nullrows WHERE group_str = 'x'",
+        4);
+    // Revised use of nulls per IMPALA-7310
+    // null_str is all nulls, NDV = 1, selectivity = 1/1, cardinality = 26
+    // BUG: At present selectivity is assumed to be 0.1
+    //verifyCardinality(
+    //      "SELECT null_int FROM functional.nullrows WHERE null_str = 'x'", 26);
+    verifyCardinality("SELECT null_int FROM functional.nullrows WHERE null_str = 'x'",
+        3);
+  }
+
+  @Test
+  public void testGroupBy() {
+    String baseStmt = "SELECT COUNT(*) " +
+                      "FROM functional.nullrows " +
+                      "GROUP BY ";
+    // NDV(a) = 26
+    verifyCardinality(baseStmt + "id", 26);
+    // f has NDV=3
+    verifyCardinality(baseStmt + "group_str", 6);
+    // b has NDV=1 (plus 1 for nulls)
+    // Bug: Nulls not counted in NDV
+    //verifyCardinality(baseStmt + "blank", 2);
+    verifyCardinality(baseStmt + "blank", 1);
+    // c is all nulls
+    // Bug: Nulls not counted in NDV
+    //verifyCardinality(baseStmt + "null_str", 1);
+    verifyCardinality(baseStmt + "null_str", 0);
+    // NDV(a) * ndv(c) = 26 * 1 = 26
+    // Bug: Nulls not counted in NDV
+    //verifyCardinality(baseStmt + "id, null_str", 26);
+    verifyCardinality(baseStmt + "id, null_str", 0);
+    // NDV(a) * ndv(f) = 26 * 3 = 78, capped at row count = 26
+    verifyCardinality(baseStmt + "id, group_str", 26);
+  }
+
+  /**
+   * Compute join cardinality using a table without stats. We estimate row count.
+   * Combine with an all-nulls column.
+   */
   @Test
   public void testNullColumnJoinCardinality() throws ImpalaException {
     // IMPALA-7565: Make sure there is no division by zero during cardinality calculation
@@ -103,6 +153,43 @@ public class CardinalityTest extends PlannerTestBase {
   }
 
   /**
+   * Compute join cardinality using a table with stats.
+   * Focus on an all-nulls column.
+   */
+  @Test
+  public void testJoinWithStats() {
+    // NDV multiplied out on group by
+    verifyCardinality(
+        "SELECT null_int FROM functional.alltypes, functional.nullrows", 7300 * 26);
+    // With that as the basis, add a GROUP BY
+    String baseStmt = "SELECT COUNT(*) " +
+                      "FROM functional.alltypes, functional.nullrows " +
+                      "GROUP BY ";
+    // Unique values, one group per row
+    verifyCardinality(baseStmt + "alltypes.id", 7300);
+    // NDV(id) = 26
+    verifyCardinality(baseStmt + "nullrows.id", 26);
+    // blank has NDV=1, but adjust for nulls
+    // Bug: Nulls not counted in NDV
+    //verifyCardinality(baseStmt + "blank", 2);
+    verifyCardinality(baseStmt + "blank", 1);
+    // group_str has NDV=6
+    verifyCardinality(baseStmt + "group_str", 6);
+    // null_str is all nulls
+    // Bug: Nulls not counted in NDV
+    //verifyCardinality(baseStmt + "null_str", 1);
+    verifyCardinality(baseStmt + "null_str", 0);
+    // NDV(id) = 26 * ndv(null_str) = 1
+    // Bug: Nulls not counted in NDV
+    // Here and for similar bugs: see IMPALA-7310 and IMPALA-8094
+    //verifyCardinality(baseStmt + "id, null_str", 26);
+    verifyCardinality(baseStmt + "nullrows.id, null_str", 0);
+    // NDV(id) = 26 * ndv(group_str) = 156
+    // Planner does not know that id determines group_str
+    verifyCardinality(baseStmt + "nullrows.id, group_str", 156);
+  }
+
+  /**
    * Joins should multiply out cardinalities.
    */
   @Test
diff --git a/testdata/NullRows/data.csv b/testdata/NullRows/data.csv
new file mode 100644
index 0000000..9d43668
--- /dev/null
+++ b/testdata/NullRows/data.csv
@@ -0,0 +1,26 @@
+a,,\N,\N,\N,a,a,true
+b,,\N,\N,\N,a,\N,false
+c,,\N,\N,\N,a,\N,\N
+d,,\N,\N,\N,a,\N,\N
+e,,\N,\N,\N,a,\N,\N
+f,,\N,\N,\N,f,f,true
+g,,\N,\N,\N,f,\N,false
+h,,\N,\N,\N,f,\N,\N
+i,,\N,\N,\N,f,\N,\N
+j,,\N,\N,\N,f,\N,\N
+k,,\N,\N,\N,k,k,true
+l,,\N,\N,\N,k,\N,false
+m,,\N,\N,\N,k,\N,\N
+n,,\N,\N,\N,k,\N,\N
+o,,\N,\N,\N,k,\N,\N
+p,,\N,\N,\N,p,p,true
+q,,\N,\N,\N,p,\N,false
+r,,\N,\N,\N,p,\N,\N
+s,,\N,\N,\N,p,\N,\N
+t,,\N,\N,\N,p,\N,\N
+u,,\N,\N,\N,u,u,true
+v,,\N,\N,\N,u,\N,false
+w,,\N,\N,\N,u,\N,\N
+x,,\N,\N,\N,u,\N,\N
+y,,\N,\N,\N,u,\N,\N
+z,,\N,\N,\N,z,z,true
diff --git a/testdata/bin/compute-table-stats.sh b/testdata/bin/compute-table-stats.sh
index d6e6d22..5bff6b1 100755
--- a/testdata/bin/compute-table-stats.sh
+++ b/testdata/bin/compute-table-stats.sh
@@ -33,7 +33,7 @@ COMPUTE_STATS_SCRIPT="${IMPALA_HOME}/tests/util/compute_table_stats.py --impalad
 # Run compute stats over as many of the tables used in the Planner tests as possible.
 ${COMPUTE_STATS_SCRIPT} --db_names=functional\
     --table_names="alltypes,alltypesagg,alltypesaggmultifilesnopart,alltypesaggnonulls,
-    alltypessmall,alltypestiny,jointbl,dimtbl,stringpartitionkey,nulltable"
+    alltypessmall,alltypestiny,jointbl,dimtbl,stringpartitionkey,nulltable,nullrows"
 
 # We cannot load HBase on s3 and isilon yet.
 if [ "${TARGET_FILESYSTEM}" = "hdfs" ]; then
diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql
index a21bdc8..9cdf3d1 100644
--- a/testdata/datasets/functional/functional_schema_template.sql
+++ b/testdata/datasets/functional/functional_schema_template.sql
@@ -1350,6 +1350,30 @@ create table {db_name}{db_suffix}.{table_name} (
 partition by hash(a) partitions 3 stored as kudu;
 ====
 ---- DATASET
+-- Table with varying ratios of nulls. Used to test NDV with nulls
+-- Also useful to test null counts as the count varies from 0 to
+-- some to all rows.
+functional
+---- BASE_TABLE_NAME
+nullrows
+---- COLUMNS
+id string
+blank string
+null_str string
+null_int int
+null_double double
+group_str string
+some_nulls string
+bool_nulls boolean
+---- ROW_FORMAT
+delimited fields terminated by ','
+---- DEPENDENT_LOAD
+INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} select * from functional.nullrows;
+---- LOAD
+LOAD DATA LOCAL INPATH '{impala_home}/testdata/NullRows/data.csv'
+OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
+====
+---- DATASET
 functional
 ---- BASE_TABLE_NAME
 nullescapedtable
diff --git a/testdata/datasets/functional/schema_constraints.csv b/testdata/datasets/functional/schema_constraints.csv
index 1ce6ce3..bc43297 100644
--- a/testdata/datasets/functional/schema_constraints.csv
+++ b/testdata/datasets/functional/schema_constraints.csv
@@ -203,6 +203,7 @@ table_name:tinytable, constraint:only, table_format:kudu/none/none
 table_name:tinyinttable, constraint:only, table_format:kudu/none/none
 table_name:zipcode_incomes, constraint:only, table_format:kudu/none/none
 table_name:nulltable, constraint:only, table_format:kudu/none/none
+table_name:nullrows, constraint:only, table_format:kudu/none/none
 table_name:nullescapedtable, constraint:only, table_format:kudu/none/none
 table_name:decimal_tbl, constraint:only, table_format:kudu/none/none
 table_name:decimal_rtf_tbl, constraint:only, table_format:kudu/none/none

[impala] 06/09: sys/types.h no longer includes sys/sysmacros.h

Posted by ta...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 9bd3ba7637fb339f2f9cf34ed65327f6fd122c88
Author: Hector Acosta <he...@cloudera.com>
AuthorDate: Fri Feb 8 14:50:01 2019 -0800

    sys/types.h no longer includes sys/sysmacros.h
    
    Change-Id: Ibf2cf9e4a211a6bc1b292c771386ca66979618eb
    Reviewed-on: http://gerrit.cloudera.org:8080/12419
    Reviewed-by: Tim Armstrong <ta...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/util/disk-info.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/be/src/util/disk-info.cc b/be/src/util/disk-info.cc
index eba4f26..0a383c8 100644
--- a/be/src/util/disk-info.cc
+++ b/be/src/util/disk-info.cc
@@ -23,6 +23,7 @@
 #include <sys/vfs.h>
 #endif
 #include <sys/types.h>
+#include <sys/sysmacros.h>
 #include <sys/stat.h>
 #include <unistd.h>

[impala] 09/09: IMPALA-5031: `uint8_t & int` type is int

Posted by ta...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 8b8d935a0045aba5caea4106a42fdc647a2a6147
Author: Jim Apple <jb...@apache.org>
AuthorDate: Sat Feb 2 16:30:38 2019 -0800

    IMPALA-5031: `uint8_t & int` type is int
    
    UBSAN finds the following in ParquetBoolDecoder.TestDecodeAndSkipping:
    
    util/bit-stream-utils.inline.h:156:25: runtime error: left shift of 42 by 28 places cannot be represented in type 'int'
        #0 BatchedBitReader::GetUleb128Int(unsigned int*) util/bit-stream-utils.inline.h:156:25
        #1 RleBatchDecoder<bool>::NextCounts() util/rle-encoding.h:778:40
        #2 RleBatchDecoder<bool>::NextNumRepeats() util/rle-encoding.h:622:28
        #3 RleBatchDecoder<bool>::GetValues(int, bool*) util/rle-encoding.h:858:27
        #4 bool ParquetBoolDecoder::DecodeValue<(parquet::Encoding::type)3>(bool*) exec/parquet/parquet-bool-decoder.h:85:24
        #5 TestSkipping(parquet::Encoding::type, unsigned char*, int, vector<bool> const&, int, int)::$_0::operator()() const exec/parquet/parquet-bool-decoder-test.cc:59
        #6 TestSkipping(parquet::Encoding::type, unsigned char*, int, vector<bool> const&, int, int) exec/parquet/parquet-bool-decoder-test.cc:69:221
        #7 ParquetBoolDecoder_TestDecodeAndSkipping_Test::TestBody() exec/parquet/parquet-bool-decoder-test.cc:85:5
        #9 testing::Test::Run() (/home/ubuntu/Impala/be/build/debug/exec/parquet/parquet-bool-decoder-test+0x6ee4f09)
    
    The problem is the line
    
        *v |= (byte & 0x7F) << shift;
    
    byte is an uint8_t and 0x7F is an int. The standard section
    [expr.bit.and] then applies the "usual arithmetic conversions"
    specified in [expr], which applies "if the type of the operand with
    signed integer type can represent all of the values of the type of the
    operand with unsigned integer type, the operand with unsigned integer
    type shall be converted to the type of the operand with signed integer
    type." That makes byte & 0x7F a signed integer type, and [expr.shift]
    says that "if E1 has a signed type and non-negative value, and E1×2^E2
    is representable in the corresponding unsigned type of the result
    type, then that value, converted to the result type, is the resulting
    value; otherwise, the behavior is undefined."
    
    Change-Id: Ie6e0b956751090f3f8aadd6783b5e06e55e57abe
    Reviewed-on: http://gerrit.cloudera.org:8080/12346
    Reviewed-by: Jim Apple <jb...@apache.org>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/util/bit-stream-utils.inline.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/be/src/util/bit-stream-utils.inline.h b/be/src/util/bit-stream-utils.inline.h
index 08f1700..18df34c 100644
--- a/be/src/util/bit-stream-utils.inline.h
+++ b/be/src/util/bit-stream-utils.inline.h
@@ -153,7 +153,10 @@ inline bool BatchedBitReader::GetUleb128Int(uint32_t* v) {
   do {
     if (UNLIKELY(shift >= MAX_VLQ_BYTE_LEN * 7)) return false;
     if (!GetBytes(1, &byte)) return false;
-    *v |= (byte & 0x7F) << shift;
+    // The constant below must be explicitly unsigned to ensure that the result of the
+    // bitwise-and is unsigned, so that the left shift is always defined behavior under
+    // the C++ standard.
+    *v |= (byte & 0x7Fu) << shift;
     shift += 7;
   } while ((byte & 0x80) != 0);
   return true;

[impala] 02/09: IMPALA-8175: improve tests_minicluster_obj

Posted by ta...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit df83d562e2a277c87fd603e81b66bf5fcdb1d6ea
Author: Michael Brown <mi...@apache.org>
AuthorDate: Fri Feb 8 09:05:46 2019 -0800

    IMPALA-8175: improve tests_minicluster_obj
    
    Adjust minicluster impalad pgrep detection usage to be compatible with
    CentOS 6 pgrep.  Skip the test if not in a minicluster, because the test
    will fail. Don't run the test in exhaustive: it's most important this
    test run pre-merge, which is core.
    
    I was able to use sh -c "pgrep ..." and impala-py.test to test this
    locally on CentOS 6 and Ubuntu 16.
    
    Change-Id: I558b3157bb16ef3d169c0d3e795e03700a17ffe4
    Reviewed-on: http://gerrit.cloudera.org:8080/12412
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 tests/comparison/cluster.py      |  6 ++++--
 tests/infra/test_stress_infra.py | 10 +++++++++-
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/tests/comparison/cluster.py b/tests/comparison/cluster.py
index a2d8fc0..33f29a9 100644
--- a/tests/comparison/cluster.py
+++ b/tests/comparison/cluster.py
@@ -845,8 +845,10 @@ class MiniClusterImpalad(Impalad):
 
   def find_pid(self):
     # Need to filter results to avoid pgrep picking up its parent bash script.
-    pid = self.shell("pgrep -f -a 'impalad.*%s' | grep -v pgrep | "
-        "grep -o '^[0-9]*' || true" % self.hs2_port)
+    # Test with:
+    # sh -c "pgrep -l -f 'impala.*21050' | grep [i]mpalad | grep -o '^[0-9]*' || true"
+    pid = self.shell("pgrep -l -f 'impalad.*%s' | grep [i]mpalad | "
+                     "grep -o '^[0-9]*' || true" % self.hs2_port)
     if pid:
       return int(pid)
 
diff --git a/tests/infra/test_stress_infra.py b/tests/infra/test_stress_infra.py
index a11116f..bdbcdc7 100644
--- a/tests/infra/test_stress_infra.py
+++ b/tests/infra/test_stress_infra.py
@@ -17,13 +17,14 @@
 
 # This module attempts to enforce infrastructural assumptions that bind test tools to
 # product or other constraints. We want to stop these assumptions from breaking at
-# pre-commit time, not later.
+# pre-merge time, not later.
 
 import pytest
 
 from decimal import Decimal
 
 from tests.common.impala_test_suite import ImpalaTestSuite
+from tests.common.skip import SkipIfBuildType
 from tests.comparison.cluster import MiniCluster
 from tests.util.parse_util import (
     EXPECTED_TPCDS_QUERIES_COUNT, EXPECTED_TPCH_NESTED_QUERIES_COUNT,
@@ -34,6 +35,10 @@ from tests.util.filesystem_utils import IS_LOCAL
 
 class TestStressInfra(ImpalaTestSuite):
 
+  @classmethod
+  def get_workload(cls):
+    return 'functional-query'
+
   def test_stress_binary_search_start_point(self):
     """
     Test that the stress test can use EXPLAIN to find the start point for its binary
@@ -62,10 +67,13 @@ class TestStressInfra(ImpalaTestSuite):
     for name in queries:
       assert name is not None
 
+  @SkipIfBuildType.remote
   def tests_minicluster_obj(self):
     """
     Test that the minicluster abstraction finds the minicluster.
     """
+    if self.exploration_strategy() == "exhaustive":
+      pytest.skip("Test does not need to run in exhaustive")
     cluster = MiniCluster()
     if IS_LOCAL:
       expected_pids = 1

[impala] 07/09: IMPALA-5043: diagnostics for topic staleness in AC

Posted by ta...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit c3c69ae362ae06adcf304b8610221a656e7b2eba
Author: Tim Armstrong <ta...@cloudera.com>
AuthorDate: Thu Feb 7 16:50:54 2019 -0800

    IMPALA-5043: diagnostics for topic staleness in AC
    
    The default threshold for the admission control topic to be considered
    stale is 5s.
    
    Adds diagnostics for stale topic updates:
    * A banner on the /admission web UI if the topic is considered stale.
    * Time since last update on the /admission web UI
    * Append a warning to rejection/queuing messages where topic staleness
      may have affected the decision.
    * Append a warning to profiles of admitted queries where the topic was
      stale at the time the query was admitted.
    * Include the topic staleness in all profiles of admitted queries
    
    Testing:
    Add a custom cluster test that kills the statestore, validates that
    admission control behaves as expected and that stateless warnings
    show up in the appropriate places.
    
    Change-Id: Ib9e26adb6419589ccf7625e423356df45bee4ac9
    Reviewed-on: http://gerrit.cloudera.org:8080/12407
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/scheduling/admission-controller.cc         | 91 ++++++++++++++++++-----
 be/src/scheduling/admission-controller.h          | 19 ++++-
 be/src/service/impala-http-handler.cc             | 20 +++--
 tests/common/impala_test_suite.py                 | 17 ++++-
 tests/custom_cluster/test_admission_controller.py | 79 +++++++++++++++++++-
 www/admission_controller.tmpl                     | 16 +++-
 6 files changed, 208 insertions(+), 34 deletions(-)

diff --git a/be/src/scheduling/admission-controller.cc b/be/src/scheduling/admission-controller.cc
index 77e6d4b..ace09e5 100644
--- a/be/src/scheduling/admission-controller.cc
+++ b/be/src/scheduling/admission-controller.cc
@@ -39,6 +39,18 @@ using namespace strings;
 DEFINE_int64(queue_wait_timeout_ms, 60 * 1000, "Maximum amount of time (in "
     "milliseconds) that a request will wait to be admitted before timing out.");
 
+// The stale topic warning threshold is made configurable to allow suppressing the
+// error if it turns out to be noisy on some deployments or allow lowering the
+// threshold to help debug admission control issues. Hidden so that we have the
+// option of making this a no-op later.
+DEFINE_int64_hidden(admission_control_stale_topic_threshold_ms, 5 * 1000,
+    "Threshold above which the admission controller will append warnings to "
+    "error messages and profiles warning that the admission control topic is "
+    "stale so that the admission control decision may have been based on stale "
+    "state data. The default, 5 seconds, is chosen to minimise false positives but "
+    "capture most cases where the Impala daemon is disconnected from the statestore "
+    "or topic updates are seriously delayed.");
+
 namespace impala {
 
 const int64_t AdmissionController::PoolStats::HISTOGRAM_NUM_OF_BINS = 128;
@@ -124,6 +136,10 @@ const string AdmissionController::PROFILE_INFO_KEY_LAST_QUEUED_REASON =
     "Latest admission queue reason";
 const string AdmissionController::PROFILE_INFO_KEY_ADMITTED_MEM =
     "Cluster Memory Admitted";
+const string AdmissionController::PROFILE_INFO_KEY_STALENESS_WARNING =
+    "Admission control state staleness";
+const string AdmissionController::PROFILE_TIME_SINCE_LAST_UPDATE_COUNTER_NAME =
+    "AdmissionControlTimeSinceLastUpdate";
 
 // Error status string details
 const string REASON_MEM_LIMIT_TOO_LOW_FOR_RESERVATION =
@@ -145,7 +161,8 @@ const string REASON_MIN_RESERVATION_OVER_POOL_MEM =
 const string REASON_DISABLED_MAX_MEM_RESOURCES =
     "disabled by pool max mem resources set to 0";
 const string REASON_DISABLED_REQUESTS_LIMIT = "disabled by requests limit set to 0";
-const string REASON_QUEUE_FULL = "queue full, limit=$0, num_queued=$1";
+// $2 is the staleness detail.
+const string REASON_QUEUE_FULL = "queue full, limit=$0, num_queued=$1.$2";
 const string REASON_REQ_OVER_POOL_MEM =
     "request memory needed $0 is greater than pool max mem resources $1.\n\n"
     "Use the MEM_LIMIT query option to indicate how much memory is required per node. "
@@ -163,17 +180,19 @@ const string REASON_THREAD_RESERVATION_AGG_LIMIT_EXCEEDED =
     "THREAD_RESERVATION_AGGREGATE_LIMIT query option value: $1 > $2.";
 
 // Queue decision details
-// $0 = num running queries, $1 = num queries limit
-const string QUEUED_NUM_RUNNING = "number of running queries $0 is at or over limit $1";
-// $0 = queue size
+// $0 = num running queries, $1 = num queries limit, $2 = staleness detail
+const string QUEUED_NUM_RUNNING =
+    "number of running queries $0 is at or over limit $1.$2";
+// $0 = queue size, $1 = staleness detail
 const string QUEUED_QUEUE_NOT_EMPTY = "queue is not empty (size $0); queued queries are "
-    "executed first";
-// $0 = pool name, $1 = pool max memory, $2 = pool mem needed, $3 = pool mem available
+    "executed first.$1";
+// $0 = pool name, $1 = pool max memory, $2 = pool mem needed, $3 = pool mem available,
+// $4 = staleness detail
 const string POOL_MEM_NOT_AVAILABLE = "Not enough aggregate memory available in pool $0 "
-    "with max mem resources $1. Needed $2 but only $3 was available.";
-// $0 = host name, $1 = host mem needed, $3 = host mem available
+    "with max mem resources $1. Needed $2 but only $3 was available.$4";
+// $0 = host name, $1 = host mem needed, $3 = host mem available, $4 = staleness detail
 const string HOST_MEM_NOT_AVAILABLE = "Not enough memory available on host $0."
-    "Needed $1 but only $2 out of $3 was available.";
+    "Needed $1 but only $2 out of $3 was available.$4";
 
 // Parses the pool name and backend_id from the topic key if it is valid.
 // Returns true if the topic key is valid and pool_name and backend_id are set.
@@ -265,10 +284,9 @@ Status AdmissionController::Init() {
   auto cb = [this](
       const StatestoreSubscriber::TopicDeltaMap& state,
       vector<TTopicDelta>* topic_updates) { UpdatePoolStats(state, topic_updates); };
-  Status status =
-      subscriber_->AddTopic(Statestore::IMPALA_REQUEST_QUEUE_TOPIC,
-          /* is_transient=*/ true, /* populate_min_subscriber_topic_version=*/ false,
-          /* filter_prefix=*/"", cb);
+  Status status = subscriber_->AddTopic(Statestore::IMPALA_REQUEST_QUEUE_TOPIC,
+      /* is_transient=*/true, /* populate_min_subscriber_topic_version=*/false,
+      /* filter_prefix=*/"", cb);
   if (!status.ok()) {
     status.AddDetail("AdmissionController failed to register request queue topic");
   }
@@ -397,7 +415,8 @@ bool AdmissionController::HasAvailableMemResources(const QuerySchedule& schedule
   if (stats->EffectiveMemReserved() + cluster_mem_to_admit > pool_max_mem) {
     *mem_unavailable_reason = Substitute(POOL_MEM_NOT_AVAILABLE, pool_name,
         PrintBytes(pool_max_mem), PrintBytes(cluster_mem_to_admit),
-        PrintBytes(max(pool_max_mem - stats->EffectiveMemReserved(), 0L)));
+        PrintBytes(max(pool_max_mem - stats->EffectiveMemReserved(), 0L)),
+        GetStalenessDetailLocked(" "));
     return false;
   }
 
@@ -418,7 +437,7 @@ bool AdmissionController::HasAvailableMemResources(const QuerySchedule& schedule
       *mem_unavailable_reason =
           Substitute(HOST_MEM_NOT_AVAILABLE, host_id, PrintBytes(per_host_mem_to_admit),
               PrintBytes(max(admit_mem_limit - effective_host_mem_reserved, 0L)),
-              PrintBytes(admit_mem_limit));
+              PrintBytes(admit_mem_limit), GetStalenessDetailLocked(" "));
       return false;
     }
   }
@@ -449,12 +468,12 @@ bool AdmissionController::CanAdmitRequest(const QuerySchedule& schedule,
   PoolStats* stats = GetPoolStats(pool_name);
   if (!admit_from_queue && stats->local_stats().num_queued > 0) {
     *not_admitted_reason = Substitute(QUEUED_QUEUE_NOT_EMPTY,
-        stats->local_stats().num_queued);
+        stats->local_stats().num_queued, GetStalenessDetailLocked(" "));
     return false;
   } else if (pool_cfg.max_requests >= 0 &&
       stats->agg_num_running() >= pool_cfg.max_requests) {
     *not_admitted_reason = Substitute(QUEUED_NUM_RUNNING, stats->agg_num_running(),
-        pool_cfg.max_requests);
+        pool_cfg.max_requests, GetStalenessDetailLocked(" "));
     return false;
   } else if (!HasAvailableMemResources(schedule, pool_cfg, not_admitted_reason)) {
     return false;
@@ -565,7 +584,7 @@ bool AdmissionController::RejectImmediately(const QuerySchedule& schedule,
   PoolStats* stats = GetPoolStats(schedule.request_pool());
   if (stats->agg_num_queued() >= pool_cfg.max_queued) {
     *rejection_reason = Substitute(REASON_QUEUE_FULL, pool_cfg.max_queued,
-        stats->agg_num_queued());
+        stats->agg_num_queued(), GetStalenessDetailLocked(" "));
     return true;
   }
 
@@ -763,6 +782,7 @@ void AdmissionController::UpdatePoolStats(
       HandleTopicUpdates(delta.topic_entries);
     }
     UpdateClusterAggregates();
+    last_topic_update_time_ms_ = MonotonicMillis();
   }
   dequeue_cv_.NotifyOne(); // Dequeue and admit queries on the dequeue thread
 }
@@ -968,7 +988,8 @@ void AdmissionController::DequeueLoop() {
         if (total_available <= 0) {
           if (!queue.empty()) {
             LogDequeueFailed(queue.head(),
-                Substitute(QUEUED_NUM_RUNNING, stats->agg_num_running(), max_requests));
+                Substitute(QUEUED_NUM_RUNNING, stats->agg_num_running(), max_requests,
+                    GetStalenessDetailLocked(" ")));
           }
           continue;
         }
@@ -1077,6 +1098,38 @@ void AdmissionController::AdmitQuery(QuerySchedule* schedule, bool was_queued) {
       PROFILE_INFO_KEY_ADMISSION_RESULT, admission_result);
   schedule->summary_profile()->AddInfoString(
       PROFILE_INFO_KEY_ADMITTED_MEM, PrintBytes(schedule->GetClusterMemoryToAdmit()));
+  // We may have admitted based on stale information. Include a warning in the profile
+  // if this this may be the case.
+  int64_t time_since_update_ms;
+  string staleness_detail = GetStalenessDetailLocked("", &time_since_update_ms);
+  COUNTER_SET(ADD_COUNTER(schedule->summary_profile(),
+      PROFILE_TIME_SINCE_LAST_UPDATE_COUNTER_NAME, TUnit::TIME_MS), time_since_update_ms);
+  if (!staleness_detail.empty()) {
+    schedule->summary_profile()->AddInfoString(
+        PROFILE_INFO_KEY_STALENESS_WARNING, staleness_detail);
+  }
+
+}
+
+string AdmissionController::GetStalenessDetail(const string& prefix,
+    int64_t* ms_since_last_update) {
+  lock_guard<mutex> lock(admission_ctrl_lock_);
+  return GetStalenessDetailLocked(prefix, ms_since_last_update);
+}
+
+string AdmissionController::GetStalenessDetailLocked(const string& prefix,
+    int64_t* ms_since_last_update) {
+  int64_t ms_since_update = MonotonicMillis() - last_topic_update_time_ms_;
+  if (ms_since_last_update != nullptr) *ms_since_last_update = ms_since_update;
+  if (last_topic_update_time_ms_ == 0) {
+    return Substitute("$0Warning: admission control information from statestore "
+                      "is stale: no update has been received.", prefix);
+  } else if (ms_since_update >= FLAGS_admission_control_stale_topic_threshold_ms) {
+    return Substitute("$0Warning: admission control information from statestore "
+                      "is stale: $1 since last update was received.",
+        prefix, PrettyPrinter::Print(ms_since_update, TUnit::TIME_MS));
+  }
+  return "";
 }
 
 void AdmissionController::PoolToJsonLocked(const string& pool_name,
diff --git a/be/src/scheduling/admission-controller.h b/be/src/scheduling/admission-controller.h
index c13d65f..14d59a9 100644
--- a/be/src/scheduling/admission-controller.h
+++ b/be/src/scheduling/admission-controller.h
@@ -226,6 +226,8 @@ class AdmissionController {
   static const string PROFILE_INFO_VAL_INITIAL_QUEUE_REASON;
   static const string PROFILE_INFO_KEY_LAST_QUEUED_REASON;
   static const string PROFILE_INFO_KEY_ADMITTED_MEM;
+  static const string PROFILE_INFO_KEY_STALENESS_WARNING;
+  static const string PROFILE_TIME_SINCE_LAST_UPDATE_COUNTER_NAME;
 
   AdmissionController(StatestoreSubscriber* subscriber,
       RequestPoolService* request_pool_service, MetricGroup* metrics,
@@ -277,6 +279,13 @@ class AdmissionController {
   void PopulatePerHostMemReservedAndAdmitted(
       std::unordered_map<std::string, std::pair<int64_t, int64_t>>* mem_map);
 
+  /// Returns a non-empty string with a warning if the admission control data is stale.
+  /// 'prefix' is added to the start of the string. Returns an empty string if not stale.
+  /// If 'ms_since_last_update' is non-null, set it to the time in ms since last update.
+  /// Caller must not hold 'admission_ctrl_lock_'.
+  std::string GetStalenessDetail(const std::string& prefix,
+      int64_t* ms_since_last_update = nullptr);
+
  private:
   class PoolStats;
   friend class PoolStats;
@@ -304,6 +313,10 @@ class AdmissionController {
   /// Protects all access to all variables below.
   boost::mutex admission_ctrl_lock_;
 
+  /// The last time a topic update was processed. Time is obtained from
+  /// MonotonicMillis(), or is 0 if an update was never received.
+  int64_t last_topic_update_time_ms_ = 0;
+
   /// Maps from host id to memory reserved and memory admitted, both aggregates over all
   /// pools. See the class doc for a detailed definition of reserved and admitted.
   /// Protected by admission_ctrl_lock_.
@@ -609,8 +622,12 @@ class AdmissionController {
   /// Is a helper method used by both PoolToJson() and AllPoolsToJson()
   void PoolToJsonLocked(const string& pool_name, rapidjson::Value* resource_pools,
       rapidjson::Document* document);
+
+  /// Same as GetStalenessDetail() except caller must hold 'admission_ctrl_lock_'.
+  std::string GetStalenessDetailLocked(const std::string& prefix,
+      int64_t* ms_since_last_update = nullptr);
 };
 
-}
+} // namespace impala
 
 #endif // SCHEDULING_ADMISSION_CONTROLLER_H
diff --git a/be/src/service/impala-http-handler.cc b/be/src/service/impala-http-handler.cc
index 4a55e66..b3145bf 100644
--- a/be/src/service/impala-http-handler.cc
+++ b/be/src/service/impala-http-handler.cc
@@ -880,15 +880,14 @@ void ImpalaHttpHandler::BackendsHandler(const Webserver::ArgumentMap& args,
 
 void ImpalaHttpHandler::AdmissionStateHandler(
     const Webserver::ArgumentMap& args, Document* document) {
+  AdmissionController* ac = ExecEnv::GetInstance()->admission_controller();
   Webserver::ArgumentMap::const_iterator pool_name_arg = args.find("pool_name");
   bool get_all_pools = (pool_name_arg == args.end());
   Value resource_pools(kArrayType);
-  if(get_all_pools){
-    ExecEnv::GetInstance()->admission_controller()->AllPoolsToJson(
-        &resource_pools, document);
+  if (get_all_pools) {
+    ac->AllPoolsToJson( &resource_pools, document);
   } else {
-    ExecEnv::GetInstance()->admission_controller()->PoolToJson(
-        pool_name_arg->second, &resource_pools, document);
+    ac->PoolToJson(pool_name_arg->second, &resource_pools, document);
   }
 
   // Now get running queries from CRS map.
@@ -938,6 +937,9 @@ void ImpalaHttpHandler::AdmissionStateHandler(
     resource_pools[i].GetObject().AddMember(
         "running_queries", queries_in_pool, document->GetAllocator());
   }
+  int64_t ms_since_last_statestore_update;
+  string staleness_detail = ac->GetStalenessDetail("", &ms_since_last_statestore_update);
+
   // In order to embed a plain json inside the webpage generated by mustache, we need
   // to stringify it and write it out as a json element.
   rapidjson::StringBuffer strbuf;
@@ -946,6 +948,14 @@ void ImpalaHttpHandler::AdmissionStateHandler(
   Value raw_json(strbuf.GetString(), document->GetAllocator());
   document->AddMember("resource_pools_plain_json", raw_json, document->GetAllocator());
   document->AddMember("resource_pools", resource_pools, document->GetAllocator());
+  document->AddMember("statestore_admission_control_time_since_last_update_ms",
+      ms_since_last_statestore_update, document->GetAllocator());
+  if (!staleness_detail.empty()) {
+    Value staleness_detail_json(staleness_detail.c_str(), document->GetAllocator());
+    document->AddMember("statestore_update_staleness_detail", staleness_detail_json,
+        document->GetAllocator());
+  }
+
   // Indicator that helps render UI elements based on this condition.
   document->AddMember("get_all_pools", get_all_pools, document->GetAllocator());
 }
diff --git a/tests/common/impala_test_suite.py b/tests/common/impala_test_suite.py
index f7df218..06299e4 100644
--- a/tests/common/impala_test_suite.py
+++ b/tests/common/impala_test_suite.py
@@ -842,14 +842,23 @@ class ImpalaTestSuite(BaseTestSuite):
     """Waits for the given 'query_handle' to reach the 'expected_state'. If it does not
     reach the given state within 'timeout' seconds, the method throws an AssertionError.
     """
+    self.wait_for_any_state(handle, [expected_state], timeout)
+
+  def wait_for_any_state(self, handle, expected_states, timeout):
+    """Waits for the given 'query_handle' to reach one of 'expected_states'. If it does
+    not reach one of the given states within 'timeout' seconds, the method throws an
+    AssertionError. Returns the final state.
+    """
     start_time = time.time()
     actual_state = self.client.get_state(handle)
-    while actual_state != expected_state and time.time() - start_time < timeout:
+    while actual_state not in expected_states and time.time() - start_time < timeout:
       actual_state = self.client.get_state(handle)
       time.sleep(0.5)
-    if actual_state != expected_state:
-      raise Timeout("query '%s' did not reach expected state '%s', last known state '%s'"
-                    % (handle.get_handle().id, expected_state, actual_state))
+    if actual_state not in expected_states:
+      raise Timeout("query {0} did not reach one of the expected states {1}, "
+                    "last known state {2}".format(handle.get_handle().id, expected_states,
+                    actual_state))
+    return actual_state
 
   def assert_impalad_log_contains(self, level, line_regex, expected_count=1):
     """
diff --git a/tests/custom_cluster/test_admission_controller.py b/tests/custom_cluster/test_admission_controller.py
index c28b3ef..7c97c5e 100644
--- a/tests/custom_cluster/test_admission_controller.py
+++ b/tests/custom_cluster/test_admission_controller.py
@@ -147,6 +147,9 @@ QUERY_END_BEHAVIORS = ['EOS', 'CLIENT_CANCEL', 'QUERY_TIMEOUT', 'CLIENT_CLOSE']
 # The timeout used for the QUERY_TIMEOUT end behaviour
 QUERY_END_TIMEOUT_S = 1
 
+# Value used for --admission_control_stale_topic_threshold_ms in tests.
+STALE_TOPIC_THRESHOLD_MS = 500
+
 # Regex that matches the first part of the profile info string added when a query is
 # queued.
 INITIAL_QUEUE_REASON_REGEX = \
@@ -267,12 +270,19 @@ class TestAdmissionController(TestAdmissionControllerBase, HS2TestSuite):
     HS2TestSuite.check_response(get_profile_resp)
     self.__check_query_options(get_profile_resp.profile, expected_options)
 
-  def _execute_and_collect_profiles(self, queries, timeout_s, config_options={}):
+  def _execute_and_collect_profiles(self, queries, timeout_s, config_options={},
+      allow_query_failure=False):
     """Submit the query statements in 'queries' in parallel to the first impalad in
     the cluster. After submission, the results are fetched from the queries in
     sequence and their profiles are collected. Wait for up to timeout_s for
-    each query to finish. Returns the profile strings."""
+    each query to finish. If 'allow_query_failure' is True, succeeds if the query
+    completes successfully or ends up in the EXCEPTION state. Otherwise expects the
+    queries to complete successfully.
+    Returns the profile strings."""
     client = self.cluster.impalads[0].service.create_beeswax_client()
+    expected_states = [client.QUERY_STATES['FINISHED']]
+    if allow_query_failure:
+      expected_states.append(client.QUERY_STATES['EXCEPTION'])
     try:
       handles = []
       profiles = []
@@ -280,8 +290,9 @@ class TestAdmissionController(TestAdmissionControllerBase, HS2TestSuite):
       for query in queries:
         handles.append(client.execute_async(query))
       for query, handle in zip(queries, handles):
-        self.wait_for_state(handle, client.QUERY_STATES['FINISHED'], timeout_s)
-        self.client.fetch(query, handle)
+        state = self.wait_for_any_state(handle, expected_states, timeout_s)
+        if state == client.QUERY_STATES['FINISHED']:
+          self.client.fetch(query, handle)
         profiles.append(self.client.get_runtime_profile(handle))
       return profiles
     finally:
@@ -850,6 +861,66 @@ class TestAdmissionController(TestAdmissionControllerBase, HS2TestSuite):
     # Close the queued query.
     self.close(queued_query_resp.operationHandle)
 
+  @pytest.mark.execute_serially
+  @CustomClusterTestSuite.with_args(
+      impalad_args=impalad_admission_ctrl_flags(max_requests=1, max_queued=3,
+          pool_max_mem=1024 * 1024 * 1024) +
+      " --admission_control_stale_topic_threshold_ms={0}".format(
+          STALE_TOPIC_THRESHOLD_MS),
+      statestored_args=_STATESTORED_ARGS)
+  def test_statestore_outage(self):
+    """Test behaviour with a failed statestore. Queries should continue to be admitted
+    but we should generate diagnostics about the stale topic."""
+    self.cluster.statestored.kill()
+    impalad = self.cluster.impalads[0]
+    # Sleep until the update should be definitely stale.
+    sleep(STALE_TOPIC_THRESHOLD_MS / 1000. * 1.5)
+    ac_json = impalad.service.get_debug_webpage_json('/admission')
+    ms_since_update = ac_json["statestore_admission_control_time_since_last_update_ms"]
+    assert ms_since_update > STALE_TOPIC_THRESHOLD_MS
+    assert ("Warning: admission control information from statestore is stale:" in
+        ac_json["statestore_update_staleness_detail"])
+
+    # Submit a batch of queries. One should get to run, one will be rejected because
+    # of the full queue, and the others will run after being queued.
+    STMT = "select sleep(100)"
+    TIMEOUT_S = 60
+    NUM_QUERIES = 5
+    profiles = self._execute_and_collect_profiles([STMT for i in xrange(NUM_QUERIES)],
+        TIMEOUT_S, allow_query_failure=True)
+    ADMITTED_STALENESS_WARNING = \
+        "Warning: admission control information from statestore is stale"
+    ADMITTED_STALENESS_PROFILE_ENTRY = \
+        "Admission control state staleness: " + ADMITTED_STALENESS_WARNING
+
+    num_queued = 0
+    num_admitted_immediately = 0
+    num_rejected = 0
+    for profile in profiles:
+      if "Admission result: Admitted immediately" in profile:
+        assert ADMITTED_STALENESS_PROFILE_ENTRY in profile, profile
+        num_admitted_immediately += 1
+      elif "Admission result: Rejected" in profile:
+        num_rejected += 1
+        # Check that the rejection error returned to the client contains a warning.
+        query_statuses = [line for line in profile.split("\n")
+                          if "Query Status:" in line]
+        assert len(query_statuses) == 1, profile
+        assert ADMITTED_STALENESS_WARNING in query_statuses[0]
+      else:
+        assert "Admission result: Admitted (queued)" in profile, profile
+        assert ADMITTED_STALENESS_PROFILE_ENTRY in profile, profile
+
+        # Check that the queued reason contains a warning.
+        queued_reasons = [line for line in profile.split("\n")
+                         if "Initial admission queue reason:" in line]
+        assert len(queued_reasons) == 1, profile
+        assert ADMITTED_STALENESS_WARNING in queued_reasons[0]
+        num_queued += 1
+    assert num_admitted_immediately == 1
+    assert num_queued == 3
+    assert num_rejected == NUM_QUERIES - num_admitted_immediately - num_queued
+
 
 class TestAdmissionControllerStress(TestAdmissionControllerBase):
   """Submits a number of queries (parameterized) with some delay between submissions
diff --git a/www/admission_controller.tmpl b/www/admission_controller.tmpl
index edd2fff..cb60cde 100644
--- a/www/admission_controller.tmpl
+++ b/www/admission_controller.tmpl
@@ -85,9 +85,17 @@ Example of json received from the impala server
                 }
             ]
         }
-    ]
+    ],
+    "statestore_admission_control_time_since_last_update_ms": 745,
+    "statestore_update_staleness_detail": "Warning: admission control information from statestore is stale: 745ms since last update was received.",
+    "get_all_pools": true
 -->
 {{> www/common-header.tmpl }}
+{{?statestore_update_staleness_detail}}
+<div class="alert alert-danger" role="alert">
+<strong>{{statestore_update_staleness_detail}}</strong>
+</div>
+{{/statestore_update_staleness_detail}}
 <script src="https://cdnjs.cloudflare.com/ajax/libs/Chart.js/2.7.3/Chart.min.js" type="text/javascript"></script>
 <script type="text/javascript">
 window.onload = function() {
@@ -204,6 +212,12 @@ function reset_method(pool_name) {
   <a href='/backends'>backends</a> debug page for memory admitted and reserved per
   backend.
 </p>
+<p class="lead">
+<strong>
+Time since last statestore update containing admission control topic state (ms):
+</strong>
+{{statestore_admission_control_time_since_last_update_ms}}
+</p>
 {{#resource_pools}}
 <div class="container-fluid">
   <h3><a href='/admission?pool_name={{pool_name}}'>{{pool_name}}</a></h3>