You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by hu...@apache.org on 2015/12/08 03:00:13 UTC
incubator-hawq git commit: HAWQ-227. Data locality downgrade by wrong
insert host.
Repository: incubator-hawq
Updated Branches:
refs/heads/master afd0e554c -> 72b349b4b
HAWQ-227. Data locality downgrade by wrong insert host.
Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/72b349b4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/72b349b4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/72b349b4
Branch: refs/heads/master
Commit: 72b349b4bb6421e97ad2b838344a4d1b5be475fc
Parents: afd0e55
Author: hubertzhang <hz...@pivotal.io>
Authored: Tue Dec 8 09:57:43 2015 +0800
Committer: hubertzhang <hz...@pivotal.io>
Committed: Tue Dec 8 09:57:43 2015 +0800
----------------------------------------------------------------------
src/backend/cdb/cdbdatalocality.c | 68 ++++++++++++++++++----------------
1 file changed, 36 insertions(+), 32 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/72b349b4/src/backend/cdb/cdbdatalocality.c
----------------------------------------------------------------------
diff --git a/src/backend/cdb/cdbdatalocality.c b/src/backend/cdb/cdbdatalocality.c
index 3e1cee7..3cacd74 100644
--- a/src/backend/cdb/cdbdatalocality.c
+++ b/src/backend/cdb/cdbdatalocality.c
@@ -2659,42 +2659,46 @@ static void allocate_random_relation(Relation_Data* rel_data,
/*find the insert node for each block*/
- int *hostOccurTimes = (int *) palloc(
- sizeof(int) * context->dds_context.size);
+ int *hostOccurTimes = (int *) palloc(sizeof(int) * context->dds_context.size);
for (int fi = 0; fi < fileCount; fi++) {
- Relation_File *rel_file = file_vector[fi];
- /*for hash file whose bucket number doesn't equal to segment number*/
- if (rel_file->hostIDs == NULL) {
- rel_file->splits[0].host = 0;
- continue;
- }
- MemSet(hostOccurTimes, 0, sizeof(int) * context->dds_context.size);
- for (i = 0; i < rel_file->split_num; i++) {
- Block_Host_Index *hostID = rel_file->hostIDs + i;
- for (int l = 0; l < hostID->replica_num; l++) {
- uint32_t key = hostID->hostIndex[l];
- hostOccurTimes[key]++;
- }
+ Relation_File *rel_file = file_vector[fi];
+ /*for hash file whose bucket number doesn't equal to segment number*/
+ if (rel_file->hostIDs == NULL) {
+ rel_file->splits[0].host = 0;
+ continue;
+ }
+ MemSet(hostOccurTimes, 0, sizeof(int) * context->dds_context.size);
+ for (i = 0; i < rel_file->split_num; i++) {
+ Block_Host_Index *hostID = rel_file->hostIDs + i;
+ for (int l = 0; l < hostID->replica_num; l++) {
+ uint32_t key = hostID->hostIndex[l];
+ hostOccurTimes[key]++;
}
- int maxOccurTime = -1;
- int inserthost = -1;
- for(int i=0;i< context->dds_context.size;i++){
- if(hostOccurTimes[i] > maxOccurTime){
- maxOccurTime = hostOccurTimes[i];
- inserthost = i;
- }
+ }
+ int maxOccurTime = -1;
+ int inserthost = -1;
+ int hostsWithSameOccurTimesExist = true;
+ for (int i = 0; i < context->dds_context.size; i++) {
+ if (hostOccurTimes[i] > maxOccurTime) {
+ maxOccurTime = hostOccurTimes[i];
+ inserthost = i;
+ hostsWithSameOccurTimesExist = false;
+ } else if (hostOccurTimes[i] == maxOccurTime) {
+ hostsWithSameOccurTimesExist = true;
}
+ }
- /* currently we consider the insert hosts are the same for all the blocks in the same file.
- * this logic can be changed in future, so we store the state in block level not file level*/
- if(maxOccurTime < rel_file->split_num){
- inserthost = -1;
- }else{
- for (i = 0; i < rel_file->split_num; i++) {
- Block_Host_Index *hostID = rel_file->hostIDs + i;
- hostID->insertHost = inserthost;
- }
- }
+ /* currently we consider the insert hosts are the same for all the blocks in the same file.
+ * this logic can be changed in future, so we store the state in block level not file level
+ * if hostsWithSameOccurTimesExist we cannot determine which is insert host
+ * if maxOccurTime <2 we cannot determine which is insert host either*/
+ if (maxOccurTime < rel_file->split_num || maxOccurTime < 2 || hostsWithSameOccurTimesExist) {
+ inserthost = -1;
+ }
+ for (i = 0; i < rel_file->split_num; i++) {
+ Block_Host_Index *hostID = rel_file->hostIDs + i;
+ hostID->insertHost = inserthost;
+ }
}
pfree(hostOccurTimes);