You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by zu...@apache.org on 2016/05/30 23:21:16 UTC

[17/33] incubator-quickstep git commit: Initial support for collecting table statistics: number of distinct values (#227)

Initial support for collecting table statistics: number of distinct values (#227)

Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/df4a05d7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/df4a05d7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/df4a05d7

Branch: refs/heads/travis-grpc
Commit: df4a05d7ea95cc65c93015e85eaf6edb824816d4
Parents: a25da39
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Thu May 19 10:58:43 2016 -0500
Committer: Zuyu Zhang <zz...@pivotal.io>
Committed: Mon May 30 15:47:51 2016 -0700

----------------------------------------------------------------------
 catalog/CMakeLists.txt                  |  11 +++
 catalog/Catalog.proto                   |  13 ++-
 catalog/CatalogRelation.cpp             |  11 +++
 catalog/CatalogRelation.hpp             |  24 ++++-
 catalog/CatalogRelationStatistics.cpp   |  49 ++++++++++
 catalog/CatalogRelationStatistics.hpp   | 122 +++++++++++++++++++++++
 cli/CMakeLists.txt                      |  21 +++-
 cli/CommandExecutor.cpp                 | 138 ++++++++++++++++++++++++++-
 cli/CommandExecutor.hpp                 |  17 ++--
 cli/QuickstepCli.cpp                    |   2 +
 cli/tests/CommandExecutorTestRunner.cpp |   2 +
 query_optimizer/ExecutionGenerator.cpp  |   6 +-
 query_optimizer/QueryProcessor.hpp      |  10 ++
 13 files changed, 410 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/df4a05d7/catalog/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/catalog/CMakeLists.txt b/catalog/CMakeLists.txt
index 94da838..64b4f16 100644
--- a/catalog/CMakeLists.txt
+++ b/catalog/CMakeLists.txt
@@ -1,5 +1,7 @@
 #   Copyright 2011-2015 Quickstep Technologies LLC.
 #   Copyright 2015-2016 Pivotal Software, Inc.
+#   Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+#     University of Wisconsin\u2014Madison.
 #
 #   Licensed under the Apache License, Version 2.0 (the "License");
 #   you may not use this file except in compliance with the License.
@@ -36,6 +38,9 @@ add_library(quickstep_catalog_CatalogRelation CatalogRelation.cpp CatalogRelatio
 add_library(quickstep_catalog_CatalogRelationSchema
             CatalogRelationSchema.cpp
             CatalogRelationSchema.hpp)
+add_library(quickstep_catalog_CatalogRelationStatistics
+            CatalogRelationStatistics.cpp
+            CatalogRelationStatistics.hpp)
 add_library(quickstep_catalog_CatalogTypedefs ../empty_src.cpp CatalogTypedefs.hpp)
 add_library(quickstep_catalog_IndexScheme IndexScheme.cpp IndexScheme.hpp)
 if(QUICKSTEP_HAVE_LIBNUMA)
@@ -98,6 +103,7 @@ target_link_libraries(quickstep_catalog_CatalogRelation
                       glog
                       quickstep_catalog_CatalogAttribute
                       quickstep_catalog_CatalogRelationSchema
+                      quickstep_catalog_CatalogRelationStatistics
                       quickstep_catalog_CatalogTypedefs
                       quickstep_catalog_Catalog_proto
                       quickstep_catalog_IndexScheme
@@ -111,6 +117,10 @@ target_link_libraries(quickstep_catalog_CatalogRelation
                       quickstep_threading_SpinSharedMutex
                       quickstep_utility_Macros
                       quickstep_utility_PtrVector)
+target_link_libraries(quickstep_catalog_CatalogRelationStatistics
+                      quickstep_catalog_CatalogTypedefs
+                      quickstep_catalog_Catalog_proto
+                      quickstep_utility_Macros)
 target_link_libraries(quickstep_catalog_IndexScheme
                       glog
                       quickstep_catalog_Catalog_proto
@@ -173,6 +183,7 @@ target_link_libraries(quickstep_catalog
                       quickstep_catalog_CatalogErrors
                       quickstep_catalog_CatalogRelation
                       quickstep_catalog_CatalogRelationSchema
+                      quickstep_catalog_CatalogRelationStatistics
                       quickstep_catalog_CatalogTypedefs
                       quickstep_catalog_IndexScheme
                       quickstep_catalog_PartitionScheme

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/df4a05d7/catalog/Catalog.proto
----------------------------------------------------------------------
diff --git a/catalog/Catalog.proto b/catalog/Catalog.proto
index 81e28cf..ce4bc2e 100644
--- a/catalog/Catalog.proto
+++ b/catalog/Catalog.proto
@@ -1,7 +1,7 @@
 //   Copyright 2011-2015 Quickstep Technologies LLC.
 //   Copyright 2015-2016 Pivotal Software, Inc.
 //   Copyright 2016, Quickstep Research Group, Computer Sciences Department,
-//    University of Wisconsin\u2014Madison.
+//     University of Wisconsin\u2014Madison.
 //
 //   Licensed under the Apache License, Version 2.0 (the "License");
 //   you may not use this file except in compliance with the License.
@@ -80,6 +80,16 @@ message IndexScheme {
   repeated IndexEntry index_entries = 1;
 }
 
+message CatalogRelationStatistics {
+  optional fixed64 num_tuples = 1;
+  
+  message NumDistinctValuesEntry {
+    required int32 attr_id = 1;
+    required fixed64 num_distinct_values = 2;
+  }
+  repeated NumDistinctValuesEntry num_distinct_values_map = 2;
+}
+
 message CatalogRelationSchema {
   required int32 relation_id = 1;
   required string name = 2;
@@ -99,6 +109,7 @@ message CatalogRelation {
     optional IndexScheme index_scheme = 18;
     optional PartitionScheme partition_scheme = 19;
     optional NUMAPlacementScheme placement_scheme = 20;
+    optional CatalogRelationStatistics statistics = 21;
   }
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/df4a05d7/catalog/CatalogRelation.cpp
----------------------------------------------------------------------
diff --git a/catalog/CatalogRelation.cpp b/catalog/CatalogRelation.cpp
index 36f82d9..01aebb5 100644
--- a/catalog/CatalogRelation.cpp
+++ b/catalog/CatalogRelation.cpp
@@ -132,6 +132,14 @@ CatalogRelation::CatalogRelation(const serialization::CatalogRelationSchema &pro
   }
 
   default_layout_.reset(new StorageBlockLayout(*this, proto_default_layout));
+
+  if (proto.HasExtension(serialization::CatalogRelation::statistics)) {
+    statistics_.reset(
+        new CatalogRelationStatistics(
+            proto.GetExtension(serialization::CatalogRelation::statistics)));
+  } else {
+    statistics_.reset(new CatalogRelationStatistics());
+  }
 }
 
 serialization::CatalogRelationSchema CatalogRelation::getProto() const {
@@ -177,6 +185,9 @@ serialization::CatalogRelationSchema CatalogRelation::getProto() const {
 #endif
   }
 
+  proto.MutableExtension(serialization::CatalogRelation::statistics)
+      ->MergeFrom(statistics_->getProto());
+
   return proto;
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/df4a05d7/catalog/CatalogRelation.hpp
----------------------------------------------------------------------
diff --git a/catalog/CatalogRelation.hpp b/catalog/CatalogRelation.hpp
index 312f3b4..e0d5350 100644
--- a/catalog/CatalogRelation.hpp
+++ b/catalog/CatalogRelation.hpp
@@ -29,6 +29,7 @@
 #include "catalog/Catalog.pb.h"
 #include "catalog/CatalogConfig.h"
 #include "catalog/CatalogRelationSchema.hpp"
+#include "catalog/CatalogRelationStatistics.hpp"
 #include "catalog/CatalogTypedefs.hpp"
 #include "catalog/IndexScheme.hpp"
 
@@ -79,7 +80,8 @@ class CatalogRelation : public CatalogRelationSchema {
                   const relation_id id = -1,
                   bool temporary = false)
       : CatalogRelationSchema(parent, name, id, temporary),
-        default_layout_(nullptr) {
+        default_layout_(nullptr),
+        statistics_(new CatalogRelationStatistics()) {
   }
 
   /**
@@ -377,6 +379,24 @@ class CatalogRelation : public CatalogRelationSchema {
            * getDefaultStorageBlockLayout().estimateTuplesPerBlock();
   }
 
+  /**
+   * @brief Get an immutable reference to the statistics of this catalog relation.
+   *
+   * @return A reference to the statistics of this catalog relation.
+   */
+  const CatalogRelationStatistics& getStatistics() const {
+    return *statistics_;
+  }
+
+  /**
+   * @brief Get a mutable pointer to the statistics of this catalog relation.
+   *
+   * @return A pointer to the statistics of this catalog relation.
+   */
+  CatalogRelationStatistics* getStatisticsMutable() {
+    return statistics_.get();
+  }
+
  private:
   // A list of blocks belonged to the relation.
   std::vector<block_id> blocks_;
@@ -397,6 +417,8 @@ class CatalogRelation : public CatalogRelationSchema {
   // Mutex for locking the index scheme.
   alignas(kCacheLineBytes) mutable SpinSharedMutex<false> index_scheme_mutex_;
 
+  std::unique_ptr<CatalogRelationStatistics> statistics_;
+
 #ifdef QUICKSTEP_HAVE_LIBNUMA
   // NUMA placement scheme object which has the mapping between the partitions
   // of the relation and the NUMA nodes/sockets. It also maintains a mapping

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/df4a05d7/catalog/CatalogRelationStatistics.cpp
----------------------------------------------------------------------
diff --git a/catalog/CatalogRelationStatistics.cpp b/catalog/CatalogRelationStatistics.cpp
new file mode 100644
index 0000000..2bd92b4
--- /dev/null
+++ b/catalog/CatalogRelationStatistics.cpp
@@ -0,0 +1,49 @@
+/**
+ *   Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ *     University of Wisconsin\u2014Madison.
+ *
+ *   Licensed under the Apache License, Version 2.0 (the "License");
+ *   you may not use this file except in compliance with the License.
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software
+ *   distributed under the License is distributed on an "AS IS" BASIS,
+ *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *   See the License for the specific language governing permissions and
+ *   limitations under the License.
+ **/
+
+#include "catalog/CatalogRelationStatistics.hpp"
+
+#include "catalog/Catalog.pb.h"
+
+namespace quickstep {
+
+CatalogRelationStatistics::CatalogRelationStatistics(
+    const serialization::CatalogRelationStatistics &proto) {
+  if (proto.has_num_tuples()) {
+    num_tuples_ = proto.num_tuples();
+  }
+  for (int i = 0; i < proto.num_distinct_values_map_size(); ++i) {
+    const auto &entry = proto.num_distinct_values_map(i);
+    num_distinct_values_map_.emplace(entry.attr_id(),
+                                     entry.num_distinct_values());
+  }
+}
+
+serialization::CatalogRelationStatistics CatalogRelationStatistics::getProto() const {
+  serialization::CatalogRelationStatistics proto;
+  if (num_tuples_ != 0) {
+    proto.set_num_tuples(num_tuples_);
+  }
+  for (const auto &pair : num_distinct_values_map_) {
+    auto entry = proto.add_num_distinct_values_map();
+    entry->set_attr_id(pair.first);
+    entry->set_num_distinct_values(pair.second);
+  }
+  return proto;
+}
+
+}  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/df4a05d7/catalog/CatalogRelationStatistics.hpp
----------------------------------------------------------------------
diff --git a/catalog/CatalogRelationStatistics.hpp b/catalog/CatalogRelationStatistics.hpp
new file mode 100644
index 0000000..572d141
--- /dev/null
+++ b/catalog/CatalogRelationStatistics.hpp
@@ -0,0 +1,122 @@
+/**
+ *   Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ *     University of Wisconsin\u2014Madison.
+ *
+ *   Licensed under the Apache License, Version 2.0 (the "License");
+ *   you may not use this file except in compliance with the License.
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software
+ *   distributed under the License is distributed on an "AS IS" BASIS,
+ *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *   See the License for the specific language governing permissions and
+ *   limitations under the License.
+ **/
+
+#ifndef QUICKSTEP_CATALOG_CATALOG_RELATION_STATISTICS_HPP_
+#define QUICKSTEP_CATALOG_CATALOG_RELATION_STATISTICS_HPP_
+
+#include <cstddef>
+#include <unordered_map>
+#include <utility>
+
+#include "catalog/Catalog.pb.h"
+#include "catalog/CatalogTypedefs.hpp"
+#include "utility/Macros.hpp"
+
+namespace quickstep {
+
+/** \addtogroup Catalog
+ *  @{
+ */
+
+/**
+ * @brief Statistics of a catalog relation. E.g. total number of tuples,
+ *        number of distinct values for each column.
+ **/
+class CatalogRelationStatistics {
+ public:
+  /**
+   * @brief Constructor.
+   **/
+  CatalogRelationStatistics()
+      : num_tuples_(0) {}
+
+  /**
+   * @brief Reconstruct a CatalogRelationStatistics object from its serialized
+   *        Protocol Buffer form.
+   *
+   * @param proto The Protocol Buffer serialization of a CatalogRelationStatistics
+   *        object, previously produced by getProto().
+   **/
+  explicit CatalogRelationStatistics(const serialization::CatalogRelationStatistics &proto);
+
+  /**
+   * @brief Serialize the CatalogRelationStatistics object as Protocol Buffer.
+   *
+   * @return The Protocol Buffer representation of the CatalogRelationStatistics
+   *         object.
+   **/
+  serialization::CatalogRelationStatistics getProto() const;
+
+  /**
+   * @brief Set the number of tuples statistic.
+   *
+   * @param num_tuples The number of tuples statistic.
+   */
+  void setNumTuples(std::size_t num_tuples) {
+    num_tuples_ = num_tuples;
+  }
+
+  /**
+   * @brief Get the number of tuples statistic.
+   *
+   * @return The number of tuples. Returns 0 if the statistic is not set.
+   */
+  std::size_t getNumTuples() const {
+    return num_tuples_;
+  }
+
+  /**
+   * @brief Set the number of distinct values statistic for a column (catalog attribute).
+   *
+   * @param attr_id The id of the column.
+   * @param num_distinct_values The number of distinct values statistic.
+   */
+  void setNumDistinctValues(attribute_id attr_id, std::size_t num_distinct_values) {
+    num_distinct_values_map_[attr_id] = num_distinct_values;
+  }
+
+  /**
+   * @brief Get the number of distinct values statistic for a column (catalog attribute).
+   *
+   * @param The id of the column.
+   * @return The number of distinct values statistic for the column. Returns 0
+   *         if the statistic is not set.
+   */
+  std::size_t getNumDistinctValues(attribute_id attr_id) const {
+    const auto it = num_distinct_values_map_.find(attr_id);
+    if (it == num_distinct_values_map_.end()) {
+      return static_cast<std::size_t>(0);
+    } else {
+      return it->second;
+    }
+  }
+
+ private:
+  // Total number of tuples in the relation.
+  std::size_t num_tuples_;
+
+  // Number of distinct values for each column.
+  std::unordered_map<attribute_id, std::size_t> num_distinct_values_map_;
+
+  DISALLOW_COPY_AND_ASSIGN(CatalogRelationStatistics);
+};
+
+/** @} */
+
+}  // namespace quickstep
+
+#endif  // QUICKSTEP_CATALOG_CATALOG_RELATION_STATISTICS_HPP_

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/df4a05d7/cli/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cli/CMakeLists.txt b/cli/CMakeLists.txt
index 761b6d8..8fee7a4 100644
--- a/cli/CMakeLists.txt
+++ b/cli/CMakeLists.txt
@@ -1,5 +1,7 @@
 #   Copyright 2011-2015 Quickstep Technologies LLC.
 #   Copyright 2015 Pivotal Software, Inc.
+#   Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+#     University of Wisconsin\u2014Madison.
 #
 #   Licensed under the Apache License, Version 2.0 (the "License");
 #   you may not use this file except in compliance with the License.
@@ -73,11 +75,24 @@ target_link_libraries(quickstep_cli_CommandExecutor
                       quickstep_catalog_CatalogDatabase
                       quickstep_catalog_CatalogRelation
                       quickstep_catalog_CatalogRelationSchema
-                      quickstep_cli_PrintToScreen 
+                      quickstep_cli_DropRelation
+                      quickstep_cli_PrintToScreen
                       quickstep_parser_ParseStatement
+                      quickstep_parser_SqlParserWrapper
+                      quickstep_queryexecution_Foreman
+                      quickstep_queryoptimizer_QueryHandle
+                      quickstep_queryoptimizer_QueryPlan
+                      quickstep_queryoptimizer_QueryProcessor
+                      quickstep_storage_StorageBlock
                       quickstep_storage_StorageBlockInfo
-                      quickstep_utility_Macros
-                      quickstep_utility_PtrVector                        
+                      quickstep_storage_StorageManager
+                      quickstep_storage_TupleIdSequence
+                      quickstep_storage_TupleStorageSubBlock
+                      quickstep_parser_ParseString
+                      quickstep_types_Type
+                      quickstep_types_TypeID
+                      quickstep_types_TypedValue
+                      quickstep_utility_PtrVector
                       quickstep_utility_SqlError)
 
 target_link_libraries(quickstep_cli_DefaultsConfigurator

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/df4a05d7/cli/CommandExecutor.cpp
----------------------------------------------------------------------
diff --git a/cli/CommandExecutor.cpp b/cli/CommandExecutor.cpp
index 026922a..3cb3f86 100644
--- a/cli/CommandExecutor.cpp
+++ b/cli/CommandExecutor.cpp
@@ -19,6 +19,7 @@
 
 #include <algorithm>
 #include <cstddef>
+#include <cstdint>
 #include <cstdio>
 #include <memory>
 #include <string>
@@ -28,14 +29,26 @@
 #include "catalog/CatalogDatabase.hpp"
 #include "catalog/CatalogRelation.hpp"
 #include "catalog/CatalogRelationSchema.hpp"
+#include "cli/DropRelation.hpp"
 #include "cli/PrintToScreen.hpp"
 #include "parser/ParseStatement.hpp"
+#include "parser/ParseString.hpp"
+#include "parser/SqlParserWrapper.hpp"
+#include "query_execution/Foreman.hpp"
+#include "query_optimizer/QueryHandle.hpp"
+#include "query_optimizer/QueryPlan.hpp"
+#include "query_optimizer/QueryProcessor.hpp"
+#include "storage/StorageBlock.hpp"
 #include "storage/StorageBlockInfo.hpp"
+#include "storage/StorageManager.hpp"
+#include "storage/TupleIdSequence.hpp"
+#include "storage/TupleStorageSubBlock.hpp"
+#include "types/Type.hpp"
+#include "types/TypeID.hpp"
+#include "types/TypedValue.hpp"
 #include "utility/PtrVector.hpp"
-#include "utility/Macros.hpp"
 #include "utility/SqlError.hpp"
 
-#include "gflags/gflags.h"
 #include "glog/logging.h"
 
 using std::fprintf;
@@ -195,11 +208,130 @@ void executeDescribeTable(
   }
 }
 
+/**
+ * @brief A helper function that executes a SQL query to obtain a scalar result.
+ */
+inline TypedValue executeQueryForSingleResult(const std::string &query_string,
+                                               StorageManager *storage_manager,
+                                               QueryProcessor *query_processor,
+                                               SqlParserWrapper *parser_wrapper,
+                                               Foreman *foreman) {
+  parser_wrapper->feedNextBuffer(new std::string(query_string));
+
+  ParseResult result = parser_wrapper->getNextStatement();
+  DCHECK(result.condition == ParseResult::kSuccess);
+
+  // Generate the query plan.
+  std::unique_ptr<QueryHandle> query_handle(
+      query_processor->generateQueryHandle(*result.parsed_statement));
+  DCHECK(query_handle->getQueryPlanMutable() != nullptr);
+
+  // Use foreman to execute the query plan.
+  foreman->setQueryPlan(query_handle->getQueryPlanMutable()->getQueryPlanDAGMutable());
+  foreman->reconstructQueryContextFromProto(query_handle->getQueryContextProto());
+
+  foreman->start();
+  foreman->join();
+
+  // Retrieve the scalar result from the result relation.
+  const CatalogRelation *query_result_relation = query_handle->getQueryResultRelation();
+  DCHECK(query_result_relation != nullptr);
+
+  TypedValue value;
+  {
+    std::vector<block_id> blocks = query_result_relation->getBlocksSnapshot();
+    DCHECK_EQ(1u, blocks.size());
+    BlockReference block = storage_manager->getBlock(blocks[0], *query_result_relation);
+    const TupleStorageSubBlock &tuple_store = block->getTupleStorageSubBlock();
+    DCHECK_EQ(1, tuple_store.numTuples());
+    DCHECK_EQ(1u, tuple_store.getRelation().size());
+
+    if (tuple_store.isPacked()) {
+      value = tuple_store.getAttributeValueTyped(0, 0);
+    } else {
+      std::unique_ptr<TupleIdSequence> existence_map(tuple_store.getExistenceMap());
+      value = tuple_store.getAttributeValueTyped(*existence_map->begin(), 0);
+    }
+    value.ensureNotReference();
+  }
+
+  // Drop the result relation.
+  DropRelation::Drop(*query_result_relation,
+                     query_processor->getDefaultDatabase(),
+                     query_processor->getStorageManager());
+
+  return value;
+}
+
+void executeAnalyze(QueryProcessor *query_processor,
+                    Foreman *foreman,
+                    FILE *out) {
+  const CatalogDatabase &database = *query_processor->getDefaultDatabase();
+  StorageManager *storage_manager = query_processor->getStorageManager();
+
+  std::unique_ptr<SqlParserWrapper> parser_wrapper(new SqlParserWrapper());
+  std::vector<std::reference_wrapper<const CatalogRelation>> relations(
+      database.begin(), database.end());
+
+  // Analyze each relation in the database.
+  for (const CatalogRelation &relation : relations) {
+    fprintf(out, "Analyzing %s ... ", relation.getName().c_str());
+    fflush(out);
+
+    CatalogRelation *mutable_relation =
+        query_processor->getDefaultDatabase()->getRelationByIdMutable(relation.getID());
+
+    // Get the number of distinct values for each column.
+    for (const CatalogAttribute &attribute : relation) {
+      std::string query_string = "SELECT COUNT(DISTINCT ";
+      query_string.append(attribute.getName());
+      query_string.append(") FROM ");
+      query_string.append(relation.getName());
+      query_string.append(";");
+
+      TypedValue num_distinct_values =
+          executeQueryForSingleResult(query_string,
+                                      storage_manager,
+                                      query_processor,
+                                      parser_wrapper.get(),
+                                      foreman);
+
+      DCHECK(num_distinct_values.getTypeID() == TypeID::kLong);
+      mutable_relation->getStatisticsMutable()->setNumDistinctValues(
+          attribute.getID(),
+          num_distinct_values.getLiteral<std::int64_t>());
+    }
+
+    // Get the number of tuples for the relation.
+    std::string query_string = "SELECT COUNT(*) FROM ";
+    query_string.append(relation.getName());
+    query_string.append(";");
+
+    TypedValue num_tuples =
+        executeQueryForSingleResult(query_string,
+                                    storage_manager,
+                                    query_processor,
+                                    parser_wrapper.get(),
+                                    foreman);
+
+    DCHECK(num_tuples.getTypeID() == TypeID::kLong);
+    mutable_relation->getStatisticsMutable()->setNumTuples(
+        num_tuples.getLiteral<std::int64_t>());
+
+    fprintf(out, "done\n");
+    fflush(out);
+  }
+  query_processor->markCatalogAltered();
+  query_processor->saveCatalog();
+}
+
 }  // namespace
 
 void executeCommand(const ParseStatement &statement,
                     const CatalogDatabase &catalog_database,
                     StorageManager *storage_manager,
+                    QueryProcessor *query_processor,
+                    Foreman *foreman,
                     FILE *out) {
   const ParseCommand &command = static_cast<const ParseCommand &>(statement);
   const PtrVector<ParseString> *arguments = command.arguments();
@@ -212,6 +344,8 @@ void executeCommand(const ParseStatement &statement,
     } else {
       executeDescribeTable(arguments, catalog_database, out);
     }
+  } else if (command_str == C::kAnalyzeCommand) {
+    executeAnalyze(query_processor, foreman, out);
   } else {
     THROW_SQL_ERROR_AT(command.command()) << "Invalid Command";
   }

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/df4a05d7/cli/CommandExecutor.hpp
----------------------------------------------------------------------
diff --git a/cli/CommandExecutor.hpp b/cli/CommandExecutor.hpp
index f367ca1..c819981 100644
--- a/cli/CommandExecutor.hpp
+++ b/cli/CommandExecutor.hpp
@@ -19,13 +19,8 @@
 #define QUICKSTEP_CLI_COMMAND_COMMAND_EXECUTOR_HPP_
 
 #include <cstdio>
-#include <limits>
 #include <string>
 
-#include "parser/ParseStatement.hpp"
-#include "storage/StorageBlockInfo.hpp"
-#include "utility/Macros.hpp"
-
 using std::fprintf;
 using std::fputc;
 using std::string;
@@ -33,11 +28,13 @@ using std::string;
 namespace quickstep {
 
 class CatalogDatabase;
-class CatalogAttribute;
-class CatalogRelation;
+class Foreman;
+class ParseStatement;
+class QueryProcessor;
 class StorageManager;
 
 namespace cli {
+
 /** \addtogroup CLI
  *  @{
  */
@@ -49,17 +46,23 @@ constexpr int kInitMaxColumnWidth = 6;
 
 constexpr char kDescribeDatabaseCommand[] = "\\dt";
 constexpr char kDescribeTableCommand[] = "\\d";
+constexpr char kAnalyzeCommand[] = "\\analyze";
 
 /**
   * @brief Executes the command by calling the command handler.
   *
   * @param statement The parsed statement from the cli.
   * @param catalog_database The catalog information about the current database.
+  * @param storage_manager The current StorageManager.
+  * @param query_processor The query processor to generate plans for SQL queries.
+  * @param foreman The foreman to execute query plans.
   * @param out The stream where the output of the command has to be redirected to.
 */
 void executeCommand(const ParseStatement &statement,
                     const CatalogDatabase &catalog_database,
                     StorageManager *storage_manager,
+                    QueryProcessor *query_processor,
+                    Foreman *foreman,
                     FILE *out);
 
 /** @} */

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/df4a05d7/cli/QuickstepCli.cpp
----------------------------------------------------------------------
diff --git a/cli/QuickstepCli.cpp b/cli/QuickstepCli.cpp
index b7b28ba..558d6eb 100644
--- a/cli/QuickstepCli.cpp
+++ b/cli/QuickstepCli.cpp
@@ -367,6 +367,8 @@ int main(int argc, char* argv[]) {
                 *result.parsed_statement,
                 *(query_processor->getDefaultDatabase()),
                 query_processor->getStorageManager(),
+                query_processor.get(),
+                &foreman,
                 stdout);
           } catch (const quickstep::SqlError &sql_error) {
             fprintf(stderr, "%s",

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/df4a05d7/cli/tests/CommandExecutorTestRunner.cpp
----------------------------------------------------------------------
diff --git a/cli/tests/CommandExecutorTestRunner.cpp b/cli/tests/CommandExecutorTestRunner.cpp
index 73d2092..9cd493e 100644
--- a/cli/tests/CommandExecutorTestRunner.cpp
+++ b/cli/tests/CommandExecutorTestRunner.cpp
@@ -88,6 +88,8 @@ void CommandExecutorTestRunner::runTestCase(
               *result.parsed_statement,
               *(test_database_loader_.catalog_database()),
               test_database_loader_.storage_manager(),
+              nullptr,
+              nullptr,
               output_stream.file());
         } else  {
           QueryHandle query_handle(optimizer_context.query_id());

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/df4a05d7/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index 7209cfa..c590b6e 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -1389,11 +1389,13 @@ void ExecutionGenerator::convertAggregate(
 
     // Add distinctify hash table impl type if it is a DISTINCT aggregation.
     if (unnamed_aggregate_expression->is_distinct()) {
-      if (group_by_types.empty()) {
+      const std::vector<E::ScalarPtr> &arguments = unnamed_aggregate_expression->getArguments();
+      DCHECK_GE(arguments.size(), 1u);
+      if (group_by_types.empty() && arguments.size() == 1) {
         aggr_state_proto->add_distinctify_hash_table_impl_types(
             SimplifyHashTableImplTypeProto(
                 HashTableImplTypeProtoFromString(FLAGS_aggregate_hashtable_type),
-                {&unnamed_aggregate_expression->getValueType()}));
+                {&arguments[0]->getValueType()}));
       } else {
         aggr_state_proto->add_distinctify_hash_table_impl_types(
             HashTableImplTypeProtoFromString(FLAGS_aggregate_hashtable_type));

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/df4a05d7/query_optimizer/QueryProcessor.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/QueryProcessor.hpp b/query_optimizer/QueryProcessor.hpp
index 4514f45..32739dc 100644
--- a/query_optimizer/QueryProcessor.hpp
+++ b/query_optimizer/QueryProcessor.hpp
@@ -1,6 +1,8 @@
 /**
  *   Copyright 2011-2015 Quickstep Technologies LLC.
  *   Copyright 2015-2016 Pivotal Software, Inc.
+ *   Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ *     University of Wisconsin\u2014Madison.
  *
  *   Licensed under the Apache License, Version 2.0 (the "License");
  *   you may not use this file except in compliance with the License.
@@ -159,6 +161,14 @@ class QueryProcessor {
   void saveCatalog();
 
   /**
+   * @brief Set \p catalog_altered_ to true to indicate that the catalog
+   *        has been altered.
+   */
+  void markCatalogAltered() {
+    catalog_altered_ = true;
+  }
+
+  /**
    * @brief Get the default database in the Catalog held by this
    *        QueryProcessor.
    **/