You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by wl...@apache.org on 2018/04/08 10:15:31 UTC
incubator-hawq git commit: HAWQ-1604. Add A New GUC hawq_hashjoin_bloomfilter to indicate if use Bloom filter for hash join. Remove gp_hashjoin_bloomfilter and bloom filter in hash join table, this part of legacy codes has been verified that it won't imp

Repository: incubator-hawq
Updated Branches:
  refs/heads/master c72e58946 -> b6391f191


HAWQ-1604. Add A New GUC hawq_hashjoin_bloomfilter to indicate if use Bloom filter for hash join.
Remove gp_hashjoin_bloomfilter and bloom filter in hash join table, this part of legacy codes has been verified that it won't improve hash join performance.


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/b6391f19
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/b6391f19
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/b6391f19

Branch: refs/heads/master
Commit: b6391f19163e5e332a870c652c2f4327ed861b68
Parents: c72e589
Author: Wen Lin <wl...@pivotal.io>
Authored: Sun Apr 8 18:10:12 2018 +0800
Committer: Wen Lin <wl...@pivotal.io>
Committed: Sun Apr 8 18:10:12 2018 +0800

----------------------------------------------------------------------
 src/backend/cdb/cdbvars.c         |  2 +-
 src/backend/executor/nodeHash.c   | 23 +++--------------------
 src/backend/utils/misc/guc.c      |  8 ++++----
 src/include/cdb/cdbvars.h         |  2 +-
 src/include/executor/hashjoin.h   |  2 +-
 src/test/unit/mock/mock_info.json |  4 ----
 6 files changed, 10 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/b6391f19/src/backend/cdb/cdbvars.c
----------------------------------------------------------------------
diff --git a/src/backend/cdb/cdbvars.c b/src/backend/cdb/cdbvars.c
index c2fca21..d8e8552 100644
--- a/src/backend/cdb/cdbvars.c
+++ b/src/backend/cdb/cdbvars.c
@@ -234,7 +234,7 @@ int 		gp_hashagg_spillbatch_min = 0;
 int 		gp_hashagg_spillbatch_max = 0;
 
 /* hash join to use bloom filter: default to 0, means not used */
-int 	 	gp_hashjoin_bloomfilter = 0;
+int 	 	hawq_hashjoin_bloomfilter = 0;
 
 /* Analyzing aid */
 int 		gp_motion_slice_noop = 0;

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/b6391f19/src/backend/executor/nodeHash.c
----------------------------------------------------------------------
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index dd63305..a461598 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -77,8 +77,6 @@ void ExecChooseHashTableSize(double ntuples, int tupwidth,
 						uint64 operatorMemKB
 						);
 
-#define BLOOMVAL(hk)  (((uint64)1) << (((hk) >> 13) & 0x3f))
-
 /* Amount of metadata memory required per batch */
 #define MD_MEM_PER_BATCH 	(sizeof(HashJoinBatchData *) + sizeof(HashJoinBatchData))
 
@@ -323,7 +321,6 @@ ExecHashTableCreate(HashState *hashState, HashJoinState *hjstate, List *hashOper
 	 */
 	hashtable = (HashJoinTable)palloc0(sizeof(HashJoinTableData));
 	hashtable->buckets = NULL;
-	hashtable->bloom = NULL;
 	hashtable->curbatch = 0;
 	hashtable->growEnabled = true;
 	hashtable->totalTuples = 0;
@@ -455,9 +452,6 @@ ExecHashTableCreate(HashState *hashState, HashJoinState *hjstate, List *hashOper
 	hashtable->buckets = (HashJoinTuple *)
 		palloc0(nbuckets * sizeof(HashJoinTuple));
 
-	if(gp_hashjoin_bloomfilter!=0)
-		hashtable->bloom = (uint64*) palloc0(nbuckets * sizeof(uint64));
-
 	MemoryContextSwitchTo(oldcxt);
 	}
 	END_MEMORY_ACCOUNT();
@@ -792,7 +786,6 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
 	{
 		HashJoinTuple prevtuple;
 		HashJoinTuple tuple;
-		uint64 bloom = 0;
 
 		prevtuple = NULL;
 		tuple = hashtable->buckets[i];
@@ -812,7 +805,6 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
 			{
 				/* keep tuple */
 				prevtuple = tuple;
-				bloom |= BLOOMVAL(tuple->hashvalue);
 			}
 			else
 			{
@@ -846,9 +838,6 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
 
 			tuple = nexttuple;
 		}
-
-		if(gp_hashjoin_bloomfilter!=0)
-			hashtable->bloom[i] = bloom;
 	}
 
 #ifdef HJDEBUG
@@ -989,9 +978,6 @@ ExecHashTableInsert(HashState *hashState, HashJoinTable hashtable,
 		hashtable->buckets[bucketno] = hashTuple;
 		hashtable->totalTuples += 1;
 
-		if(gp_hashjoin_bloomfilter!=0)
-			hashtable->bloom[bucketno] |= BLOOMVAL(hashvalue);
-
 		/* Double the number of batches when too much data in hash table. */
 		if (batch->innerspace > hashtable->spaceAllowed ||
 			batch->innertuples > UINT_MAX/2)
@@ -1195,12 +1181,12 @@ ExecScanHashBucket(HashState *hashState, HashJoinState *hjstate,
 	 */
 	if (hashTuple == NULL)
 	{
-		/* if bloom filter fails, then no match - don't even bother to scan */
-		if (gp_hashjoin_bloomfilter == 0 || 0 != (hashtable->bloom[hjstate->hj_CurBucketNo] & BLOOMVAL(hashvalue)))
-			hashTuple = hashtable->buckets[hjstate->hj_CurBucketNo];
+		hashTuple = hashtable->buckets[hjstate->hj_CurBucketNo];
 	}
 	else
+	{
 		hashTuple = hashTuple->next;
+	}
 
 	while (hashTuple != NULL)
 	{
@@ -1263,9 +1249,6 @@ ExecHashTableReset(HashState *hashState, HashJoinTable hashtable)
 	hashtable->buckets = (HashJoinTuple *)
 		palloc0(nbuckets * sizeof(HashJoinTuple));
 
-	if(gp_hashjoin_bloomfilter != 0)
-		hashtable->bloom = (uint64*) palloc0(nbuckets * sizeof(uint64));
-
 	hashtable->batches[hashtable->curbatch]->innerspace = 0;
 	hashtable->batches[hashtable->curbatch]->innertuples = 0;
 	hashtable->totalTuples = 0;

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/b6391f19/src/backend/utils/misc/guc.c
----------------------------------------------------------------------
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index ac29d87..64449da 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -5834,13 +5834,13 @@ static struct config_int ConfigureNamesInt[] =
 	},
 
 	{
-		{"gp_hashjoin_bloomfilter", PGC_USERSET, GP_ARRAY_TUNING,
+		{"hawq_hashjoin_bloomfilter", PGC_USERSET, GP_ARRAY_TUNING,
 		 gettext_noop("Use bloomfilter in hash join"),
-		 gettext_noop("Use bloomfilter may speed up hashtable probing"),
+		 gettext_noop("Use bloomfilter may speed up hash join performance"),
 		 GUC_NOT_IN_SAMPLE | GUC_NO_SHOW_ALL | GUC_GPDB_ADDOPT
 		},
-		&gp_hashjoin_bloomfilter,
-		1, 0, 1, NULL, NULL
+		&hawq_hashjoin_bloomfilter,
+		0, 0, 1, NULL, NULL
 	},
 
 	{

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/b6391f19/src/include/cdb/cdbvars.h
----------------------------------------------------------------------
diff --git a/src/include/cdb/cdbvars.h b/src/include/cdb/cdbvars.h
index 9f6c3b1..7ce988a 100644
--- a/src/include/cdb/cdbvars.h
+++ b/src/include/cdb/cdbvars.h
@@ -937,7 +937,7 @@ extern int gp_hashagg_spillbatch_min;
 extern int gp_hashagg_spillbatch_max;
 
 /* Hashjoin use bloom filter */
-extern int gp_hashjoin_bloomfilter;
+extern int hawq_hashjoin_bloomfilter;
 
 /* Get statistics for partitioned parent from a child */
 extern bool 	gp_statistics_pullup_from_child_partition;

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/b6391f19/src/include/executor/hashjoin.h
----------------------------------------------------------------------
diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h
index 9776eb0..208aed6 100644
--- a/src/include/executor/hashjoin.h
+++ b/src/include/executor/hashjoin.h
@@ -167,7 +167,7 @@ typedef struct HashJoinTableData
 	int			nbuckets;		/* # buckets in the in-memory hash table */
 	/* buckets[i] is head of list of tuples in i'th in-memory bucket */
 	struct HashJoinTupleData **buckets;
-	uint64     				  *bloom; /* bloom[i] is bloomfilter for buckets[i] */
+
 	/* buckets array is per-batch storage, as are all the tuples */
 
 	int			nbatch;			/* number of batches */

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/b6391f19/src/test/unit/mock/mock_info.json
----------------------------------------------------------------------
diff --git a/src/test/unit/mock/mock_info.json b/src/test/unit/mock/mock_info.json
index 9198cf1..315c351 100644
--- a/src/test/unit/mock/mock_info.json
+++ b/src/test/unit/mock/mock_info.json
@@ -13929,10 +13929,6 @@
             "filename": "src/backend/cdb/cdbvars.c", 
             "header filename": "src/include/cdb/cdbvars.h"
         }, 
-        "gp_hashjoin_bloomfilter": {
-            "filename": "src/backend/cdb/cdbvars.c", 
-            "header filename": "src/include/cdb/cdbvars.h"
-        }, 
         "gp_hashjoin_tuples_per_bucket": {
             "filename": "src/backend/cdb/cdbvars.c", 
             "header filename": "src/include/cdb/cdbvars.h"