You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by wl...@apache.org on 2018/04/08 10:15:31 UTC
incubator-hawq git commit: HAWQ-1604. Add A New GUC
hawq_hashjoin_bloomfilter to indicate if use Bloom filter for hash join.
Remove gp_hashjoin_bloomfilter and bloom filter in hash join table,
this part of legacy codes has been verified that it won't imp
Repository: incubator-hawq
Updated Branches:
refs/heads/master c72e58946 -> b6391f191
HAWQ-1604. Add A New GUC hawq_hashjoin_bloomfilter to indicate if use Bloom filter for hash join.
Remove gp_hashjoin_bloomfilter and bloom filter in hash join table, this part of legacy codes has been verified that it won't improve hash join performance.
Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/b6391f19
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/b6391f19
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/b6391f19
Branch: refs/heads/master
Commit: b6391f19163e5e332a870c652c2f4327ed861b68
Parents: c72e589
Author: Wen Lin <wl...@pivotal.io>
Authored: Sun Apr 8 18:10:12 2018 +0800
Committer: Wen Lin <wl...@pivotal.io>
Committed: Sun Apr 8 18:10:12 2018 +0800
----------------------------------------------------------------------
src/backend/cdb/cdbvars.c | 2 +-
src/backend/executor/nodeHash.c | 23 +++--------------------
src/backend/utils/misc/guc.c | 8 ++++----
src/include/cdb/cdbvars.h | 2 +-
src/include/executor/hashjoin.h | 2 +-
src/test/unit/mock/mock_info.json | 4 ----
6 files changed, 10 insertions(+), 31 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/b6391f19/src/backend/cdb/cdbvars.c
----------------------------------------------------------------------
diff --git a/src/backend/cdb/cdbvars.c b/src/backend/cdb/cdbvars.c
index c2fca21..d8e8552 100644
--- a/src/backend/cdb/cdbvars.c
+++ b/src/backend/cdb/cdbvars.c
@@ -234,7 +234,7 @@ int gp_hashagg_spillbatch_min = 0;
int gp_hashagg_spillbatch_max = 0;
/* hash join to use bloom filter: default to 0, means not used */
-int gp_hashjoin_bloomfilter = 0;
+int hawq_hashjoin_bloomfilter = 0;
/* Analyzing aid */
int gp_motion_slice_noop = 0;
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/b6391f19/src/backend/executor/nodeHash.c
----------------------------------------------------------------------
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index dd63305..a461598 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -77,8 +77,6 @@ void ExecChooseHashTableSize(double ntuples, int tupwidth,
uint64 operatorMemKB
);
-#define BLOOMVAL(hk) (((uint64)1) << (((hk) >> 13) & 0x3f))
-
/* Amount of metadata memory required per batch */
#define MD_MEM_PER_BATCH (sizeof(HashJoinBatchData *) + sizeof(HashJoinBatchData))
@@ -323,7 +321,6 @@ ExecHashTableCreate(HashState *hashState, HashJoinState *hjstate, List *hashOper
*/
hashtable = (HashJoinTable)palloc0(sizeof(HashJoinTableData));
hashtable->buckets = NULL;
- hashtable->bloom = NULL;
hashtable->curbatch = 0;
hashtable->growEnabled = true;
hashtable->totalTuples = 0;
@@ -455,9 +452,6 @@ ExecHashTableCreate(HashState *hashState, HashJoinState *hjstate, List *hashOper
hashtable->buckets = (HashJoinTuple *)
palloc0(nbuckets * sizeof(HashJoinTuple));
- if(gp_hashjoin_bloomfilter!=0)
- hashtable->bloom = (uint64*) palloc0(nbuckets * sizeof(uint64));
-
MemoryContextSwitchTo(oldcxt);
}
END_MEMORY_ACCOUNT();
@@ -792,7 +786,6 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
{
HashJoinTuple prevtuple;
HashJoinTuple tuple;
- uint64 bloom = 0;
prevtuple = NULL;
tuple = hashtable->buckets[i];
@@ -812,7 +805,6 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
{
/* keep tuple */
prevtuple = tuple;
- bloom |= BLOOMVAL(tuple->hashvalue);
}
else
{
@@ -846,9 +838,6 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
tuple = nexttuple;
}
-
- if(gp_hashjoin_bloomfilter!=0)
- hashtable->bloom[i] = bloom;
}
#ifdef HJDEBUG
@@ -989,9 +978,6 @@ ExecHashTableInsert(HashState *hashState, HashJoinTable hashtable,
hashtable->buckets[bucketno] = hashTuple;
hashtable->totalTuples += 1;
- if(gp_hashjoin_bloomfilter!=0)
- hashtable->bloom[bucketno] |= BLOOMVAL(hashvalue);
-
/* Double the number of batches when too much data in hash table. */
if (batch->innerspace > hashtable->spaceAllowed ||
batch->innertuples > UINT_MAX/2)
@@ -1195,12 +1181,12 @@ ExecScanHashBucket(HashState *hashState, HashJoinState *hjstate,
*/
if (hashTuple == NULL)
{
- /* if bloom filter fails, then no match - don't even bother to scan */
- if (gp_hashjoin_bloomfilter == 0 || 0 != (hashtable->bloom[hjstate->hj_CurBucketNo] & BLOOMVAL(hashvalue)))
- hashTuple = hashtable->buckets[hjstate->hj_CurBucketNo];
+ hashTuple = hashtable->buckets[hjstate->hj_CurBucketNo];
}
else
+ {
hashTuple = hashTuple->next;
+ }
while (hashTuple != NULL)
{
@@ -1263,9 +1249,6 @@ ExecHashTableReset(HashState *hashState, HashJoinTable hashtable)
hashtable->buckets = (HashJoinTuple *)
palloc0(nbuckets * sizeof(HashJoinTuple));
- if(gp_hashjoin_bloomfilter != 0)
- hashtable->bloom = (uint64*) palloc0(nbuckets * sizeof(uint64));
-
hashtable->batches[hashtable->curbatch]->innerspace = 0;
hashtable->batches[hashtable->curbatch]->innertuples = 0;
hashtable->totalTuples = 0;
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/b6391f19/src/backend/utils/misc/guc.c
----------------------------------------------------------------------
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index ac29d87..64449da 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -5834,13 +5834,13 @@ static struct config_int ConfigureNamesInt[] =
},
{
- {"gp_hashjoin_bloomfilter", PGC_USERSET, GP_ARRAY_TUNING,
+ {"hawq_hashjoin_bloomfilter", PGC_USERSET, GP_ARRAY_TUNING,
gettext_noop("Use bloomfilter in hash join"),
- gettext_noop("Use bloomfilter may speed up hashtable probing"),
+ gettext_noop("Use bloomfilter may speed up hash join performance"),
GUC_NOT_IN_SAMPLE | GUC_NO_SHOW_ALL | GUC_GPDB_ADDOPT
},
- &gp_hashjoin_bloomfilter,
- 1, 0, 1, NULL, NULL
+ &hawq_hashjoin_bloomfilter,
+ 0, 0, 1, NULL, NULL
},
{
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/b6391f19/src/include/cdb/cdbvars.h
----------------------------------------------------------------------
diff --git a/src/include/cdb/cdbvars.h b/src/include/cdb/cdbvars.h
index 9f6c3b1..7ce988a 100644
--- a/src/include/cdb/cdbvars.h
+++ b/src/include/cdb/cdbvars.h
@@ -937,7 +937,7 @@ extern int gp_hashagg_spillbatch_min;
extern int gp_hashagg_spillbatch_max;
/* Hashjoin use bloom filter */
-extern int gp_hashjoin_bloomfilter;
+extern int hawq_hashjoin_bloomfilter;
/* Get statistics for partitioned parent from a child */
extern bool gp_statistics_pullup_from_child_partition;
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/b6391f19/src/include/executor/hashjoin.h
----------------------------------------------------------------------
diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h
index 9776eb0..208aed6 100644
--- a/src/include/executor/hashjoin.h
+++ b/src/include/executor/hashjoin.h
@@ -167,7 +167,7 @@ typedef struct HashJoinTableData
int nbuckets; /* # buckets in the in-memory hash table */
/* buckets[i] is head of list of tuples in i'th in-memory bucket */
struct HashJoinTupleData **buckets;
- uint64 *bloom; /* bloom[i] is bloomfilter for buckets[i] */
+
/* buckets array is per-batch storage, as are all the tuples */
int nbatch; /* number of batches */
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/b6391f19/src/test/unit/mock/mock_info.json
----------------------------------------------------------------------
diff --git a/src/test/unit/mock/mock_info.json b/src/test/unit/mock/mock_info.json
index 9198cf1..315c351 100644
--- a/src/test/unit/mock/mock_info.json
+++ b/src/test/unit/mock/mock_info.json
@@ -13929,10 +13929,6 @@
"filename": "src/backend/cdb/cdbvars.c",
"header filename": "src/include/cdb/cdbvars.h"
},
- "gp_hashjoin_bloomfilter": {
- "filename": "src/backend/cdb/cdbvars.c",
- "header filename": "src/include/cdb/cdbvars.h"
- },
"gp_hashjoin_tuples_per_bucket": {
"filename": "src/backend/cdb/cdbvars.c",
"header filename": "src/include/cdb/cdbvars.h"