You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by zu...@apache.org on 2016/05/27 03:23:03 UTC
[12/50] [abbrv] incubator-quickstep git commit: Fixed the incorrect
distinctify hash table problem related to DISTINCT aggregation
Fixed the incorrect distinctify hash table problem related to DISTINCT aggregation
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/70fcdb5d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/70fcdb5d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/70fcdb5d
Branch: refs/heads/change-aggregation-hashtable
Commit: 70fcdb5d6c0741db6382c42ee546f418e2a2cee6
Parents: 41cea5d
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Wed May 4 22:20:41 2016 -0500
Committer: Jianqiao Zhu <ji...@cs.wisc.edu>
Committed: Wed May 4 22:25:11 2016 -0500
----------------------------------------------------------------------
query_optimizer/ExecutionGenerator.cpp | 67 ++++++++++++--------
.../tests/execution_generator/Select.test | 23 +++----
storage/AggregationOperationState.cpp | 28 +++++++-
storage/AggregationOperationState.hpp | 5 ++
storage/AggregationOperationState.proto | 5 ++
5 files changed, 89 insertions(+), 39 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/70fcdb5d/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index 3698701..077d35d 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -1285,6 +1285,35 @@ void ExecutionGenerator::convertAggregate(
findRelationInfoOutputByPhysical(physical_plan->input());
aggr_state_proto->set_relation_id(input_relation_info->relation->getID());
+ std::vector<const Type*> group_by_types;
+ for (const E::NamedExpressionPtr &grouping_expression : physical_plan->grouping_expressions()) {
+ unique_ptr<const Scalar> execution_group_by_expression;
+ E::AliasPtr alias;
+ if (E::SomeAlias::MatchesWithConditionalCast(grouping_expression, &alias)) {
+ E::ScalarPtr scalar;
+ // NOTE(zuyu): For aggregate expressions, all child expressions of an
+ // Alias should be a Scalar.
+ CHECK(E::SomeScalar::MatchesWithConditionalCast(alias->expression(), &scalar))
+ << alias->toString();
+ execution_group_by_expression.reset(scalar->concretize(attribute_substitution_map_));
+ } else {
+ execution_group_by_expression.reset(
+ grouping_expression->concretize(attribute_substitution_map_));
+ }
+ aggr_state_proto->add_group_by_expressions()->CopyFrom(execution_group_by_expression->getProto());
+ group_by_types.push_back(&execution_group_by_expression->getType());
+ }
+
+ if (!group_by_types.empty()) {
+ // SimplifyHashTableImplTypeProto() switches the hash table implementation
+ // from SeparateChaining to SimpleScalarSeparateChaining when there is a
+ // single scalar key type with a reversible hash function.
+ aggr_state_proto->set_hash_table_impl_type(
+ SimplifyHashTableImplTypeProto(
+ HashTableImplTypeProtoFromString(FLAGS_aggregate_hashtable_type),
+ group_by_types));
+ }
+
for (const E::AliasPtr &named_aggregate_expression : physical_plan->aggregate_expressions()) {
const E::AggregateFunctionPtr unnamed_aggregate_expression =
std::static_pointer_cast<const E::AggregateFunction>(named_aggregate_expression->expression());
@@ -1304,25 +1333,19 @@ void ExecutionGenerator::convertAggregate(
// Set whether it is a DISTINCT aggregation.
aggr_proto->set_is_distinct(unnamed_aggregate_expression->is_distinct());
- }
- std::vector<const Type*> group_by_types;
- for (const E::NamedExpressionPtr &grouping_expression : physical_plan->grouping_expressions()) {
- unique_ptr<const Scalar> execution_group_by_expression;
- E::AliasPtr alias;
- if (E::SomeAlias::MatchesWithConditionalCast(grouping_expression, &alias)) {
- E::ScalarPtr scalar;
- // NOTE(zuyu): For aggregate expressions, all child expressions of an
- // Alias should be a Scalar.
- CHECK(E::SomeScalar::MatchesWithConditionalCast(alias->expression(), &scalar))
- << alias->toString();
- execution_group_by_expression.reset(scalar->concretize(attribute_substitution_map_));
- } else {
- execution_group_by_expression.reset(
- grouping_expression->concretize(attribute_substitution_map_));
+ // Add distinctify hash table impl type if it is a DISTINCT aggregation.
+ if (unnamed_aggregate_expression->is_distinct()) {
+ if (group_by_types.empty()) {
+ aggr_state_proto->add_distinctify_hash_table_impl_types(
+ SimplifyHashTableImplTypeProto(
+ HashTableImplTypeProtoFromString(FLAGS_aggregate_hashtable_type),
+ {&unnamed_aggregate_expression->getValueType()}));
+ } else {
+ aggr_state_proto->add_distinctify_hash_table_impl_types(
+ HashTableImplTypeProtoFromString(FLAGS_aggregate_hashtable_type));
+ }
}
- aggr_state_proto->add_group_by_expressions()->CopyFrom(execution_group_by_expression->getProto());
- group_by_types.push_back(&execution_group_by_expression->getType());
}
if (physical_plan->filter_predicate() != nullptr) {
@@ -1332,16 +1355,6 @@ void ExecutionGenerator::convertAggregate(
aggr_state_proto->set_estimated_num_entries(cost_model_->estimateCardinality(physical_plan));
- if (!group_by_types.empty()) {
- // SimplifyHashTableImplTypeProto() switches the hash table implementation
- // from SeparateChaining to SimpleScalarSeparateChaining when there is a
- // single scalar key type with a reversible hash function.
- aggr_state_proto->set_hash_table_impl_type(
- SimplifyHashTableImplTypeProto(
- HashTableImplTypeProtoFromString(FLAGS_aggregate_hashtable_type),
- group_by_types));
- }
-
const QueryPlan::DAGNodeIndex aggregation_operator_index =
execution_plan_->addRelationalOperator(
new AggregationOperator(
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/70fcdb5d/query_optimizer/tests/execution_generator/Select.test
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/execution_generator/Select.test b/query_optimizer/tests/execution_generator/Select.test
index a08b012..390b7b6 100644
--- a/query_optimizer/tests/execution_generator/Select.test
+++ b/query_optimizer/tests/execution_generator/Select.test
@@ -535,21 +535,12 @@ WHERE int_col = 2;
SELECT int_col
FROM test
-GROUP BY int_col;
+GROUP BY int_col
+ORDER BY int_col;
--
+-----------+
|int_col |
+-----------+
-| 2|
-| 4|
-| 6|
-| 8|
-| 12|
-| 14|
-| 16|
-| 18|
-| 22|
-| 24|
| -23|
| -21|
| -19|
@@ -562,6 +553,16 @@ GROUP BY int_col;
| -5|
| -3|
| -1|
+| 2|
+| 4|
+| 6|
+| 8|
+| 12|
+| 14|
+| 16|
+| 18|
+| 22|
+| 24|
+-----------+
==
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/70fcdb5d/storage/AggregationOperationState.cpp
----------------------------------------------------------------------
diff --git a/storage/AggregationOperationState.cpp b/storage/AggregationOperationState.cpp
index a3a669c..d209ceb 100644
--- a/storage/AggregationOperationState.cpp
+++ b/storage/AggregationOperationState.cpp
@@ -1,6 +1,8 @@
/**
* Copyright 2011-2015 Quickstep Technologies LLC.
* Copyright 2015-2016 Pivotal Software, Inc.
+ * Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ * University of Wisconsin\u2014Madison.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -64,6 +66,7 @@ AggregationOperationState::AggregationOperationState(
const Predicate *predicate,
const std::size_t estimated_num_entries,
const HashTableImplType hash_table_impl_type,
+ const std::vector<HashTableImplType> &distinctify_hash_table_impl_types,
StorageManager *storage_manager)
: input_relation_(input_relation),
predicate_(predicate),
@@ -101,6 +104,8 @@ AggregationOperationState::AggregationOperationState(
std::vector<std::vector<std::unique_ptr<const Scalar>>>::const_iterator args_it
= arguments_.begin();
std::vector<bool>::const_iterator is_distinct_it = is_distinct_.begin();
+ std::vector<HashTableImplType>::const_iterator distinctify_hash_table_impl_types_it
+ = distinctify_hash_table_impl_types.begin();
for (; agg_func_it != aggregate_functions.end(); ++agg_func_it, ++args_it, ++is_distinct_it) {
// Get the Types of this aggregate's arguments so that we can create an
// AggregationHandle.
@@ -161,10 +166,11 @@ AggregationOperationState::AggregationOperationState(
// query optimization, if it worths.
distinctify_hashtables_.emplace_back(
handles_.back()->createDistinctifyHashTable(
- hash_table_impl_type,
+ *distinctify_hash_table_impl_types_it,
key_types,
estimated_num_entries,
storage_manager));
+ ++distinctify_hash_table_impl_types_it;
} else {
distinctify_hashtables_.emplace_back(nullptr);
}
@@ -182,6 +188,8 @@ AggregationOperationState* AggregationOperationState::ReconstructFromProto(
std::vector<const AggregateFunction*> aggregate_functions;
std::vector<std::vector<std::unique_ptr<const Scalar>>> arguments;
std::vector<bool> is_distinct;
+ std::vector<HashTableImplType> distinctify_hash_table_impl_types;
+ std::size_t distinctify_hash_table_impl_type_index = 0;
for (int agg_idx = 0; agg_idx < proto.aggregates_size(); ++agg_idx) {
const serialization::Aggregate &agg_proto = proto.aggregates(agg_idx);
@@ -197,6 +205,13 @@ AggregationOperationState* AggregationOperationState::ReconstructFromProto(
}
is_distinct.emplace_back(agg_proto.is_distinct());
+
+ if (agg_proto.is_distinct()) {
+ distinctify_hash_table_impl_types.emplace_back(
+ HashTableImplTypeFromProto(
+ proto.distinctify_hash_table_impl_types(distinctify_hash_table_impl_type_index)));
+ ++distinctify_hash_table_impl_type_index;
+ }
}
std::vector<std::unique_ptr<const Scalar>> group_by_expressions;
@@ -223,6 +238,7 @@ AggregationOperationState* AggregationOperationState::ReconstructFromProto(
predicate.release(),
proto.estimated_num_entries(),
HashTableImplTypeFromProto(proto.hash_table_impl_type()),
+ distinctify_hash_table_impl_types,
storage_manager);
}
@@ -234,6 +250,8 @@ bool AggregationOperationState::ProtoIsValid(const serialization::AggregationOpe
return false;
}
+ std::size_t num_distinctify_hash_tables = proto.distinctify_hash_table_impl_types_size();
+ std::size_t distinctify_hash_table_impl_type_index = 0;
for (int i = 0; i < proto.aggregates_size(); ++i) {
if (!AggregateFunctionFactory::ProtoIsValid(proto.aggregates(i).function())) {
return false;
@@ -251,6 +269,14 @@ bool AggregationOperationState::ProtoIsValid(const serialization::AggregationOpe
return false;
}
}
+
+ if (proto.aggregates(i).is_distinct()) {
+ if (distinctify_hash_table_impl_type_index >= num_distinctify_hash_tables ||
+ !serialization::HashTableImplType_IsValid(
+ proto.distinctify_hash_table_impl_types(distinctify_hash_table_impl_type_index))) {
+ return false;
+ }
+ }
}
for (int i = 0; i < proto.group_by_expressions_size(); ++i) {
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/70fcdb5d/storage/AggregationOperationState.hpp
----------------------------------------------------------------------
diff --git a/storage/AggregationOperationState.hpp b/storage/AggregationOperationState.hpp
index b883ed1..c3a1278 100644
--- a/storage/AggregationOperationState.hpp
+++ b/storage/AggregationOperationState.hpp
@@ -1,6 +1,8 @@
/**
* Copyright 2011-2015 Quickstep Technologies LLC.
* Copyright 2015-2016 Pivotal Software, Inc.
+ * Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ * University of Wisconsin\u2014Madison.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -93,6 +95,8 @@ class AggregationOperationState {
* in the input relation.
* @param hash_table_impl_type The HashTable implementation to use for
* GROUP BY. Ignored if group_by is empty.
+ * @param distinctify_hash_table_impl_type The HashTable implementation to use
+ * for the distinctify phase of each DISTINCT aggregation.
* @param storage_manager The StorageManager to use for allocating hash
* tables. Single aggregation state (when GROUP BY list is not
* specified) is not allocated using memory from storage manager.
@@ -105,6 +109,7 @@ class AggregationOperationState {
const Predicate *predicate,
const std::size_t estimated_num_entries,
const HashTableImplType hash_table_impl_type,
+ const std::vector<HashTableImplType> &distinctify_hash_table_impl_types,
StorageManager *storage_manager);
~AggregationOperationState() {}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/70fcdb5d/storage/AggregationOperationState.proto
----------------------------------------------------------------------
diff --git a/storage/AggregationOperationState.proto b/storage/AggregationOperationState.proto
index 031f782..bf78e3a 100644
--- a/storage/AggregationOperationState.proto
+++ b/storage/AggregationOperationState.proto
@@ -1,5 +1,7 @@
// Copyright 2011-2015 Quickstep Technologies LLC.
// Copyright 2015-2016 Pivotal Software, Inc.
+// Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+// University of Wisconsin\u2014Madison.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -37,4 +39,7 @@ message AggregationOperationState {
// NOTE(chasseur): 'hash_table_impl_type' is marked optional, but it is
// needed if 'group_by_expressions' is non-empty, and ignored otherwise.
optional HashTableImplType hash_table_impl_type = 6;
+
+ // Each DISTINCT aggregation has its distinctify hash table impl type.
+ repeated HashTableImplType distinctify_hash_table_impl_types = 7;
}