You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafodion.apache.org by su...@apache.org on 2017/02/23 17:20:07 UTC
[2/4] incubator-trafodion git commit: TRAFODION-2492 Support for
native hive views, plus few more changes
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/4c3077ce/core/sql/optimizer/NATable.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/NATable.cpp b/core/sql/optimizer/NATable.cpp
index 0918b6e..0e4bd31 100644
--- a/core/sql/optimizer/NATable.cpp
+++ b/core/sql/optimizer/NATable.cpp
@@ -4848,6 +4848,7 @@ NABoolean createNAFileSets(TrafDesc * table_desc /*IN*/,
viewTextCharSet_(CharInfo::UnknownCharSet),
viewCheck_(NULL),
viewColUsages_(NULL),
+ hiveOrigViewText_(NULL),
flags_(IS_INSERTABLE | IS_UPDATABLE),
insertMode_(COM_REGULAR_TABLE_INSERT_MODE),
isSynonymTranslationDone_(FALSE),
@@ -4963,36 +4964,36 @@ NABoolean createNAFileSets(TrafDesc * table_desc /*IN*/,
// Check if the synonym name translation to reference object has been done.
if (table_desc->tableDesc()->isSynonymTranslationDone())
- {
- isSynonymTranslationDone_ = TRUE;
- NAString synonymReferenceName(table_desc->tableDesc()->tablename);
- synonymReferenceName_ = synonymReferenceName;
- ComUID uid(table_desc->tableDesc()->objectUID);
- synonymReferenceObjectUid_ = uid;
- }
+ {
+ isSynonymTranslationDone_ = TRUE;
+ NAString synonymReferenceName(table_desc->tableDesc()->tablename);
+ synonymReferenceName_ = synonymReferenceName;
+ ComUID uid(table_desc->tableDesc()->objectUID);
+ synonymReferenceObjectUid_ = uid;
+ }
// Check if it is a UMD table, or SMD table or MV related UMD object
// and set cll correcsponding flags to indicate this.
if (table_desc->tableDesc()->isUMDTable())
- {
- isUMDTable_ = TRUE;
- }
+ {
+ isUMDTable_ = TRUE;
+ }
if (table_desc->tableDesc()->isSystemTableCode())
- {
- isSMDTable_ = TRUE;
- }
+ {
+ isSMDTable_ = TRUE;
+ }
if (table_desc->tableDesc()->isMVMetadataObject())
- {
- isMVUMDTable_ = TRUE;
- }
+ {
+ isMVUMDTable_ = TRUE;
+ }
isTrigTempTable_ = (qualifiedName_.getSpecialType() == ExtendedQualName::TRIGTEMP_TABLE);
if (table_desc->tableDesc()->isVolatileTable())
- {
- setVolatileTable( TRUE );
- }
+ {
+ setVolatileTable( TRUE );
+ }
switch(table_desc->tableDesc()->rowFormat())
{
@@ -5008,541 +5009,546 @@ NABoolean createNAFileSets(TrafDesc * table_desc /*IN*/,
}
if (table_desc->tableDesc()->isInMemoryObject())
- {
- setInMemoryObjectDefn( TRUE );
- }
+ {
+ setInMemoryObjectDefn( TRUE );
+ }
if (table_desc->tableDesc()->isDroppable())
- {
- setDroppableTable( TRUE );
- }
+ {
+ setDroppableTable( TRUE );
+ }
if (corrName.isExternal())
- {
- setIsExternalTable(TRUE);
- }
+ {
+ setIsExternalTable(TRUE);
+ }
if (qualifiedName_.getQualifiedNameObj().isHistograms() ||
qualifiedName_.getQualifiedNameObj().isHistogramIntervals())
- {
- setIsHistogramTable(TRUE);
- }
+ {
+ setIsHistogramTable(TRUE);
+ }
- insertMode_ = table_desc->tableDesc()->insertMode();
+ insertMode_ = table_desc->tableDesc()->insertMode();
- setRecordLength(table_desc->tableDesc()->record_length);
- //
- // Add timestamp information.
- //
- createTime_ = table_desc->tableDesc()->createTime;
- redefTime_ = table_desc->tableDesc()->redefTime;
- cacheTime_ = table_desc->tableDesc()->cacheTime;
+ setRecordLength(table_desc->tableDesc()->record_length);
+ //
+ // Add timestamp information.
+ //
+ createTime_ = table_desc->tableDesc()->createTime;
+ redefTime_ = table_desc->tableDesc()->redefTime;
+ cacheTime_ = table_desc->tableDesc()->cacheTime;
- catalogUID_ = table_desc->tableDesc()->catUID;
- schemaUID_ = table_desc->tableDesc()->schemaUID;
- objectUID_ = table_desc->tableDesc()->objectUID;
+ catalogUID_ = table_desc->tableDesc()->catUID;
+ schemaUID_ = table_desc->tableDesc()->schemaUID;
+ objectUID_ = table_desc->tableDesc()->objectUID;
- // Set the objectUID_ for hbase Cell and Row tables, if the table has
- // been defined in Trafodion use this value, otherwise, set to 0
- if (isHbaseCell_ || isHbaseRow_)
- {
- if ( !fetchObjectUIDForNativeTable(corrName) )
- return;
+ // Set the objectUID_ for hbase Cell and Row tables, if the table has
+ // been defined in Trafodion use this value, otherwise, set to 0
+ if (isHbaseCell_ || isHbaseRow_)
+ {
+ if ( !fetchObjectUIDForNativeTable(corrName) )
+ return;
- if (objectUID_ > 0 )
- setHasExternalTable(TRUE);
- }
+ if (objectUID_ > 0 )
+ setHasExternalTable(TRUE);
+ }
- if (table_desc->tableDesc()->owner)
- {
- Int32 userInfo (table_desc->tableDesc()->owner);
- owner_ = userInfo;
- }
- if (table_desc->tableDesc()->schemaOwner)
- {
- Int32 schemaUser(table_desc->tableDesc()->schemaOwner);
- schemaOwner_ = schemaUser;
- }
+ if (table_desc->tableDesc()->owner)
+ {
+ Int32 userInfo (table_desc->tableDesc()->owner);
+ owner_ = userInfo;
+ }
+ if (table_desc->tableDesc()->schemaOwner)
+ {
+ Int32 schemaUser(table_desc->tableDesc()->schemaOwner);
+ schemaOwner_ = schemaUser;
+ }
- objectType_ = table_desc->tableDesc()->objectType();
- partitioningScheme_ = table_desc->tableDesc()->partitioningScheme();
+ objectType_ = table_desc->tableDesc()->objectType();
+ partitioningScheme_ = table_desc->tableDesc()->partitioningScheme();
- // Set up privs
- if ((corrName.getSpecialType() == ExtendedQualName::SG_TABLE) ||
- (!(corrName.isSeabaseMD() || corrName.isSpecialTable())))
+ // Set up privs
+ if ((corrName.getSpecialType() == ExtendedQualName::SG_TABLE) ||
+ (!(corrName.isSeabaseMD() || corrName.isSpecialTable())))
getPrivileges(table_desc->tableDesc()->priv_desc);
- if ((table_desc->tableDesc()->objectFlags & SEABASE_OBJECT_IS_EXTERNAL_HIVE) != 0 ||
- (table_desc->tableDesc()->objectFlags & SEABASE_OBJECT_IS_EXTERNAL_HBASE) != 0)
- setIsExternalTable(TRUE);
-
- if (CmpSeabaseDDL::isMDflagsSet
- (table_desc->tableDesc()->tablesFlags, MD_TABLES_HIVE_EXT_COL_ATTRS))
- setHiveExtColAttrs(TRUE);
- if (CmpSeabaseDDL::isMDflagsSet
- (table_desc->tableDesc()->tablesFlags, MD_TABLES_HIVE_EXT_KEY_ATTRS))
- setHiveExtKeyAttrs(TRUE);
+ if ((table_desc->tableDesc()->objectFlags & SEABASE_OBJECT_IS_EXTERNAL_HIVE) != 0 ||
+ (table_desc->tableDesc()->objectFlags & SEABASE_OBJECT_IS_EXTERNAL_HBASE) != 0)
+ {
+ setIsExternalTable(TRUE);
- if (table_desc->tableDesc()->snapshotName)
- {
- snapshotName_ =
- new(heap_) char[strlen(table_desc->tableDesc()->snapshotName) + 1];
- strcpy(snapshotName_, table_desc->tableDesc()->snapshotName);
- }
+ if (table_desc->tableDesc()->objectFlags & SEABASE_OBJECT_IS_IMPLICIT_EXTERNAL_HIVE)
+ setIsImplicitExternalTable(TRUE);
+ }
- if (table_desc->tableDesc()->default_col_fam)
- defaultColFam_ = table_desc->tableDesc()->default_col_fam;
+ if (CmpSeabaseDDL::isMDflagsSet
+ (table_desc->tableDesc()->tablesFlags, MD_TABLES_HIVE_EXT_COL_ATTRS))
+ setHiveExtColAttrs(TRUE);
+ if (CmpSeabaseDDL::isMDflagsSet
+ (table_desc->tableDesc()->tablesFlags, MD_TABLES_HIVE_EXT_KEY_ATTRS))
+ setHiveExtKeyAttrs(TRUE);
- if (table_desc->tableDesc()->all_col_fams)
- {
- // Space delimited col families.
+ if (table_desc->tableDesc()->snapshotName)
+ {
+ snapshotName_ =
+ new(heap_) char[strlen(table_desc->tableDesc()->snapshotName) + 1];
+ strcpy(snapshotName_, table_desc->tableDesc()->snapshotName);
+ }
- string buf; // Have a buffer string
- stringstream ss(table_desc->tableDesc()->all_col_fams); // Insert the string into a stream
+ if (table_desc->tableDesc()->default_col_fam)
+ defaultColFam_ = table_desc->tableDesc()->default_col_fam;
- while (ss >> buf)
- {
- allColFams_.insert(buf.c_str());
- }
- }
- else
- allColFams_.insert(defaultColFam_);
+ if (table_desc->tableDesc()->all_col_fams)
+ {
+ // Space delimited col families.
- TrafDesc * files_desc = table_desc->tableDesc()->files_desc;
+ string buf; // Have a buffer string
+ stringstream ss(table_desc->tableDesc()->all_col_fams); // Insert the string into a stream
- // Some objects don't have a file_desc set up (e.g. views)
- // Therefore, only setup the partnsDesc_ if this is a partitionable object
- if (files_desc)
- {
- if (files_desc->filesDesc()->partns_desc)
- partnsDesc_ = files_desc->filesDesc()->partns_desc;
- }
- else
- partnsDesc_ = NULL;
+ while (ss >> buf)
+ {
+ allColFams_.insert(buf.c_str());
+ }
+ }
+ else
+ allColFams_.insert(defaultColFam_);
- //
- // Insert a NAColumn in the colArray_ for this NATable for each
- // columns_desc from the ARK SMD. Returns TRUE if error creating NAColumns.
- //
- if (createNAColumns(table_desc->tableDesc()->columns_desc,
- this,
- colArray_ /*OUT*/,
- heap_))
- //coverity[leaked_storage]
- return; // colcount_ == 0 indicates an error
+ TrafDesc * files_desc = table_desc->tableDesc()->files_desc;
- //
- // Add view information, if this is a view
- //
- TrafDesc *view_desc = table_desc->tableDesc()->views_desc;
- if (view_desc)
- {
- viewText_ = new (heap_) char[strlen(view_desc->viewDesc()->viewtext) + 2];
- strcpy(viewText_, view_desc->viewDesc()->viewtext);
- strcat(viewText_, ";");
-
- viewTextCharSet_ = (CharInfo::CharSet)view_desc->viewDesc()->viewtextcharset;
-
- viewCheck_ = NULL; //initialize
- if(view_desc->viewDesc()->viewchecktext){
- UInt32 viewCheckLength = str_len(view_desc->viewDesc()->viewchecktext)+1;
- viewCheck_ = new (heap_) char[ viewCheckLength];
- memcpy(viewCheck_, view_desc->viewDesc()->viewchecktext,
- viewCheckLength);
- }
+ // Some objects don't have a file_desc set up (e.g. views)
+ // Therefore, only setup the partnsDesc_ if this is a partitionable object
+ if (files_desc)
+ {
+ if (files_desc->filesDesc()->partns_desc)
+ partnsDesc_ = files_desc->filesDesc()->partns_desc;
+ }
+ else
+ partnsDesc_ = NULL;
- viewColUsages_ = NULL;
- if(view_desc->viewDesc()->viewcolusages){
- viewColUsages_ = new (heap_) NAList<ComViewColUsage *>(heap_); //initialize empty list
- char * beginStr (view_desc->viewDesc()->viewcolusages);
- char * endStr = strchr(beginStr, ';');
- while (endStr != NULL) {
- ComViewColUsage *colUsage = new (heap_) ComViewColUsage;
- NAString currentUsage(beginStr, endStr - beginStr + 1);
- colUsage->unpackUsage (currentUsage.data());
- viewColUsages_->insert(colUsage);
- beginStr = endStr+1;
- endStr = strchr(beginStr, ';');
- }
- }
- setUpdatable(view_desc->viewDesc()->isUpdatable());
- setInsertable(view_desc->viewDesc()->isInsertable());
+ //
+ // Insert a NAColumn in the colArray_ for this NATable for each
+ // columns_desc from the ARK SMD. Returns TRUE if error creating NAColumns.
+ //
+ if (createNAColumns(table_desc->tableDesc()->columns_desc,
+ this,
+ colArray_ /*OUT*/,
+ heap_))
+ //coverity[leaked_storage]
+ return; // colcount_ == 0 indicates an error
- //
- // The updatable flag is false for an MP view only if it is NOT a
- // protection view. Therefore updatable == FALSE iff it is a
- // shorthand view. See ReadTableDef.cpp, l. 3379.
- //
+ //
+ // Add view information, if this is a view
+ //
+ TrafDesc *view_desc = table_desc->tableDesc()->views_desc;
+ if (view_desc)
+ {
+ viewText_ = new (heap_) char[strlen(view_desc->viewDesc()->viewtext) + 2];
+ strcpy(viewText_, view_desc->viewDesc()->viewtext);
+ strcat(viewText_, ";");
+
+ viewTextCharSet_ = (CharInfo::CharSet)view_desc->viewDesc()->viewtextcharset;
+
+ viewCheck_ = NULL; //initialize
+ if(view_desc->viewDesc()->viewchecktext){
+ UInt32 viewCheckLength = str_len(view_desc->viewDesc()->viewchecktext)+1;
+ viewCheck_ = new (heap_) char[ viewCheckLength];
+ memcpy(viewCheck_, view_desc->viewDesc()->viewchecktext,
+ viewCheckLength);
+ }
- viewFileName_ = NULL;
- CMPASSERT(view_desc->viewDesc()->viewfilename);
- UInt32 viewFileNameLength = str_len(view_desc->viewDesc()->viewfilename) + 1;
- viewFileName_ = new (heap_) char[viewFileNameLength];
- memcpy(viewFileName_, view_desc->viewDesc()->viewfilename,
- viewFileNameLength);
- }
- else
- {
- //keep track of memory used by NAFileSets
- Lng32 preCreateNAFileSetsMemSize = heap_->getAllocSize();
+ viewColUsages_ = NULL;
+ if(view_desc->viewDesc()->viewcolusages){
+ viewColUsages_ = new (heap_) NAList<ComViewColUsage *>(heap_); //initialize empty list
+ char * beginStr (view_desc->viewDesc()->viewcolusages);
+ char * endStr = strchr(beginStr, ';');
+ while (endStr != NULL) {
+ ComViewColUsage *colUsage = new (heap_) ComViewColUsage;
+ NAString currentUsage(beginStr, endStr - beginStr + 1);
+ colUsage->unpackUsage (currentUsage.data());
+ viewColUsages_->insert(colUsage);
+ beginStr = endStr+1;
+ endStr = strchr(beginStr, ';');
+ }
+ }
+ setUpdatable(view_desc->viewDesc()->isUpdatable());
+ setInsertable(view_desc->viewDesc()->isInsertable());
- //
- // Process indexes and vertical partitions for this table.
- //
- if (createNAFileSets(table_desc /*IN*/,
- this /*IN*/,
- colArray_ /*IN*/,
- indexes_ /*OUT*/,
- vertParts_ /*OUT*/,
- clusteringIndex_ /*OUT*/,
- tableIdList_ /*OUT*/,
- heap_,
- bindWA,
- newColumns_, /*OUT*/
- maxIndexLevelsPtr)) {
- return; // colcount_ == 0 indicates an error
- }
+ //
+ // The updatable flag is false for an MP view only if it is NOT a
+ // protection view. Therefore updatable == FALSE iff it is a
+ // shorthand view. See ReadTableDef.cpp, l. 3379.
+ //
- // Add constraint info.
- //
- // This call to createConstraintInfo, calls the parser on
- // the constraint name
- //
+ viewFileName_ = NULL;
+ CMPASSERT(view_desc->viewDesc()->viewfilename);
+ UInt32 viewFileNameLength = str_len(view_desc->viewDesc()->viewfilename) + 1;
+ viewFileName_ = new (heap_) char[viewFileNameLength];
+ memcpy(viewFileName_, view_desc->viewDesc()->viewfilename,
+ viewFileNameLength);
+ }
+ else
+ {
+ //keep track of memory used by NAFileSets
+ Lng32 preCreateNAFileSetsMemSize = heap_->getAllocSize();
- NABoolean errorOccurred =
- createConstraintInfo(table_desc /*IN*/,
- getTableName() /*IN*/,
- getNAColumnArray()/*IN (some columns updated)*/,
- checkConstraints_ /*OUT*/,
- uniqueConstraints_/*OUT*/,
- refConstraints_ /*OUT*/,
- heap_,
- bindWA);
-
- if (errorOccurred) {
- // return before setting colcount_, indicating that there
- // was an error in constructing this NATable.
- //
- return;
- }
+ //
+ // Process indexes and vertical partitions for this table.
+ //
+ if (createNAFileSets(table_desc /*IN*/,
+ this /*IN*/,
+ colArray_ /*IN*/,
+ indexes_ /*OUT*/,
+ vertParts_ /*OUT*/,
+ clusteringIndex_ /*OUT*/,
+ tableIdList_ /*OUT*/,
+ heap_,
+ bindWA,
+ newColumns_, /*OUT*/
+ maxIndexLevelsPtr)) {
+ return; // colcount_ == 0 indicates an error
+ }
- //
- // FetchHistograms call used to be here -- moved to getStatistics().
- //
- }
+ // Add constraint info.
+ //
+ // This call to createConstraintInfo, calls the parser on
+ // the constraint name
+ //
- // change partFunc for base table if PARTITION clause has been used
- // to limit the number of partitions that will be accessed.
- if ((qualifiedName_.isPartitionNameSpecified()) ||
- (qualifiedName_.isPartitionRangeSpecified())) {
- if (filterUnusedPartitions(corrName.getPartnClause())) {
- return ;
- }
- }
+ NABoolean errorOccurred =
+ createConstraintInfo(table_desc /*IN*/,
+ getTableName() /*IN*/,
+ getNAColumnArray()/*IN (some columns updated)*/,
+ checkConstraints_ /*OUT*/,
+ uniqueConstraints_/*OUT*/,
+ refConstraints_ /*OUT*/,
+ heap_,
+ bindWA);
+
+ if (errorOccurred) {
+ // return before setting colcount_, indicating that there
+ // was an error in constructing this NATable.
+ //
+ return;
+ }
- //
- // Set colcount_ after all possible errors (Binder uses nonzero colcount
- // as an indicator of valid table definition).
- //
- CMPASSERT(table_desc->tableDesc()->colcount >= 0); // CollIndex cast ok?
- colcount_ = (CollIndex)table_desc->tableDesc()->colcount;
+ //
+ // FetchHistograms call used to be here -- moved to getStatistics().
+ //
+ }
- // If there is a host variable associated with this table, store it
- // for use by the generator to generate late-name resolution information.
- //
- HostVar *hv = corrName.getPrototype();
- prototype_ = hv ? new (heap_) HostVar(*hv) : NULL;
+ // change partFunc for base table if PARTITION clause has been used
+ // to limit the number of partitions that will be accessed.
+ if ((qualifiedName_.isPartitionNameSpecified()) ||
+ (qualifiedName_.isPartitionRangeSpecified())) {
+ if (filterUnusedPartitions(corrName.getPartnClause())) {
+ return ;
+ }
+ }
- // MV
- // Initialize the MV support data members
- isAnMV_ = table_desc->tableDesc()->isMVTable();
- isAnMVMetaData_ = table_desc->tableDesc()->isMVMetadataObject();
- mvAttributeBitmap_.initBitmap(table_desc->tableDesc()->mvAttributesBitmap);
+ //
+ // Set colcount_ after all possible errors (Binder uses nonzero colcount
+ // as an indicator of valid table definition).
+ //
+ CMPASSERT(table_desc->tableDesc()->colcount >= 0); // CollIndex cast ok?
+ colcount_ = (CollIndex)table_desc->tableDesc()->colcount;
- TrafDesc *mvs_desc = NULL; // using mvs not set or returned for traf tables
- // Memory Leak
- while (mvs_desc)
- {
- TrafUsingMvDesc* mv = mvs_desc->usingMvDesc();
+ // If there is a host variable associated with this table, store it
+ // for use by the generator to generate late-name resolution information.
+ //
+ HostVar *hv = corrName.getPrototype();
+ prototype_ = hv ? new (heap_) HostVar(*hv) : NULL;
+
+ // MV
+ // Initialize the MV support data members
+ isAnMV_ = table_desc->tableDesc()->isMVTable();
+ isAnMVMetaData_ = table_desc->tableDesc()->isMVMetadataObject();
+ mvAttributeBitmap_.initBitmap(table_desc->tableDesc()->mvAttributesBitmap);
+
+ TrafDesc *mvs_desc = NULL; // using mvs not set or returned for traf tables
+ // Memory Leak
+ while (mvs_desc)
+ {
+ TrafUsingMvDesc* mv = mvs_desc->usingMvDesc();
- UsingMvInfo *usingMv = new(heap_)
- UsingMvInfo(mv->mvName, mv->refreshType(), mv->rewriteEnabled,
- mv->isInitialized, heap_);
- mvsUsingMe_.insert(usingMv);
+ UsingMvInfo *usingMv = new(heap_)
+ UsingMvInfo(mv->mvName, mv->refreshType(), mv->rewriteEnabled,
+ mv->isInitialized, heap_);
+ mvsUsingMe_.insert(usingMv);
- mvs_desc = mvs_desc->next;
- }
+ mvs_desc = mvs_desc->next;
+ }
- // ++MV
+ // ++MV
- // fix the special-type for MV objects. There are case where the type is
- // set to NORMAL_TABLE although this is an MV.
- //
- // Example:
- // --------
- // in the statement "select * from MV1" mv1 will have a NORMAL_TABLE
- // special-type, while in "select * from table(mv_table MV1)" it will
- // have the MV_TABLE special-type.
+ // fix the special-type for MV objects. There are case where the type is
+ // set to NORMAL_TABLE although this is an MV.
+ //
+ // Example:
+ // --------
+ // in the statement "select * from MV1" mv1 will have a NORMAL_TABLE
+ // special-type, while in "select * from table(mv_table MV1)" it will
+ // have the MV_TABLE special-type.
- if (isAnMV_)
- {
- switch(qualifiedName_.getSpecialType())
- {
- case ExtendedQualName::GHOST_TABLE:
- qualifiedName_.setSpecialType(ExtendedQualName::GHOST_MV_TABLE);
- break;
- case ExtendedQualName::GHOST_MV_TABLE:
- // Do not change it
- break;
- default:
- qualifiedName_.setSpecialType(ExtendedQualName::MV_TABLE);
- break;
- }
- }
+ if (isAnMV_)
+ {
+ switch(qualifiedName_.getSpecialType())
+ {
+ case ExtendedQualName::GHOST_TABLE:
+ qualifiedName_.setSpecialType(ExtendedQualName::GHOST_MV_TABLE);
+ break;
+ case ExtendedQualName::GHOST_MV_TABLE:
+ // Do not change it
+ break;
+ default:
+ qualifiedName_.setSpecialType(ExtendedQualName::MV_TABLE);
+ break;
+ }
+ }
- // --MV
+ // --MV
- // Initialize the sequence generator fields
- TrafDesc *sequence_desc = table_desc->tableDesc()->sequence_generator_desc;
- if (sequence_desc != NULL) {
- TrafSequenceGeneratorDesc *sg_desc = sequence_desc->sequenceGeneratorDesc();
+ // Initialize the sequence generator fields
+ TrafDesc *sequence_desc = table_desc->tableDesc()->sequence_generator_desc;
+ if (sequence_desc != NULL) {
+ TrafSequenceGeneratorDesc *sg_desc = sequence_desc->sequenceGeneratorDesc();
- if (sg_desc != NULL)
- {
- sgAttributes_ =
- new(heap_) SequenceGeneratorAttributes(
- sg_desc->startValue,
- sg_desc->increment,
- sg_desc->maxValue,
- sg_desc->minValue,
- sg_desc->sgType(),
- (ComSQLDataType)sg_desc->sqlDataType,
- (ComFSDataType)sg_desc->fsDataType,
- sg_desc->cycleOption,
- FALSE,
- sg_desc->objectUID,
- sg_desc->cache,
- sg_desc->nextValue,
- 0,
- sg_desc->redefTime);
- }
- }
+ if (sg_desc != NULL)
+ {
+ sgAttributes_ =
+ new(heap_) SequenceGeneratorAttributes(
+ sg_desc->startValue,
+ sg_desc->increment,
+ sg_desc->maxValue,
+ sg_desc->minValue,
+ sg_desc->sgType(),
+ (ComSQLDataType)sg_desc->sqlDataType,
+ (ComFSDataType)sg_desc->fsDataType,
+ sg_desc->cycleOption,
+ FALSE,
+ sg_desc->objectUID,
+ sg_desc->cache,
+ sg_desc->nextValue,
+ 0,
+ sg_desc->redefTime);
+ }
+ }
#ifndef NDEBUG
- if (getenv("NATABLE_DEBUG"))
- {
- cout << "NATable " << (void*)this << " "
- << qualifiedName_.getQualifiedNameObj().getQualifiedNameAsAnsiString() << " "
- << (Int32)qualifiedName_.getSpecialType() << endl;
- colArray_.print();
- }
+ if (getenv("NATABLE_DEBUG"))
+ {
+ cout << "NATable " << (void*)this << " "
+ << qualifiedName_.getQualifiedNameObj().getQualifiedNameAsAnsiString() << " "
+ << (Int32)qualifiedName_.getSpecialType() << endl;
+ colArray_.print();
+ }
#endif
- //this guy is cacheable
- if((qualifiedName_.isCacheable())&&
- (NOT (isHbaseTable())) &&
- //this object is not on the statement heap (i.e. it is being cached)
- ((heap_ != CmpCommon::statementHeap())||
- (OSIM_runningInCaptureMode())))
- {
- char * nodeName = NULL;
- char * catStr = NULL;
- char * schemaStr = NULL;
- char * fileStr = NULL;
- short nodeNameLen = 0;
- Int32 catStrLen = 0;
- Int32 schemaStrLen = 0;
- Int32 fileStrLen = 0;
+ //this guy is cacheable
+ if((qualifiedName_.isCacheable())&&
+ (NOT (isHbaseTable())) &&
+ //this object is not on the statement heap (i.e. it is being cached)
+ ((heap_ != CmpCommon::statementHeap())||
+ (OSIM_runningInCaptureMode())))
+ {
+ char * nodeName = NULL;
+ char * catStr = NULL;
+ char * schemaStr = NULL;
+ char * fileStr = NULL;
+ short nodeNameLen = 0;
+ Int32 catStrLen = 0;
+ Int32 schemaStrLen = 0;
+ Int32 fileStrLen = 0;
#ifdef NA_64BIT
- // dg64 - match signature
- int_32 primaryNodeNum=0;
+ // dg64 - match signature
+ int_32 primaryNodeNum=0;
#else
- Int32 primaryNodeNum=0;
+ Int32 primaryNodeNum=0;
#endif
- short error = 0;
+ short error = 0;
- //clusteringIndex has physical filename that can be used to check
- //if a catalog operation has been performed on a table.
- //Views don't have clusteringIndex, so we get physical filename
- //from the viewFileName_ datamember.
- if(viewText_)
- {
- //view filename starts with node name
- //filename is in format \<node_name>.$<volume>.<subvolume>.<file>
- //catStr => <volume>
- //schemaStr => <subvolume>
- //fileStr => <file>
- nodeName = viewFileName_;
- catStr = nodeName;
-
- //skip over node name
- //measure node name length
- //get to begining of volume name
- //Measure length of node name
- //skip over node name i.e. \MAYA, \AZTEC, etc
- //and get to volume name
- while((nodeName[nodeNameLen]!='.')&&
- (nodeNameLen < 8)){
- catStr++;
- nodeNameLen++;
- };
-
- //skip over '.' and the '$' in volume name
- catStr=&nodeName[nodeNameLen+2];
- schemaStr=catStr;
-
- //skip over the volume/catalog name
- //while measuring catalog name length
- while((catStr[catStrLen]!='.')&&
- (catStrLen < 8))
- {
- schemaStr++;
- catStrLen++;
- }
-
- //skip over the '.'
- schemaStr++;
- fileStr=schemaStr;
-
- //skip over the subvolume/schema name
- //while measuring schema name length
- while((schemaStr[schemaStrLen]!='.')&&
- (schemaStrLen < 8))
- {
- fileStr++;
- schemaStrLen++;
- }
-
- //skip over the '.'
- fileStr++;
- fileStrLen = str_len(fileStr);
-
- //figure out the node number for the node
- //which has the primary partition.
- primaryNodeNum=0;
-
- if(!OSIM_runningSimulation())
- primaryNodeNum = gpClusterInfo->mapNodeNameToNodeNum(NAString(nodeName));
- }
- else{
- //get qualified name of the clustering index which should
- //be the actual physical file name of the table
- const QualifiedName fileNameObj = getClusteringIndex()->
- getRandomPartition();
- const NAString fileName = fileNameObj.getObjectName();
-
- //get schemaName object
- const SchemaName schemaNameObj = fileNameObj.getSchemaName();
- const NAString schemaName = schemaNameObj.getSchemaName();
-
- //get catalogName object
- //this contains a string in the form \<node_name>.$volume
- const CatalogName catalogNameObj = fileNameObj.getCatalogName();
- const NAString catalogName = catalogNameObj.getCatalogName();
- nodeName = (char*) catalogName.data();
- catStr = nodeName;
-
- //Measure length of node name
- //skip over node name i.e. \MAYA, \AZTEC, etc
- //and get to volume name
- while((nodeName[nodeNameLen]!='.')&&
- (nodeNameLen < 8)){
- catStr++;
- nodeNameLen++;
- };
-
- //get volume/catalog name
- //skip ".$"
- catStr=&nodeName[nodeNameLen+2];
+ //clusteringIndex has physical filename that can be used to check
+ //if a catalog operation has been performed on a table.
+ //Views don't have clusteringIndex, so we get physical filename
+ //from the viewFileName_ datamember.
+ if(viewText_)
+ {
+ //view filename starts with node name
+ //filename is in format \<node_name>.$<volume>.<subvolume>.<file>
+ //catStr => <volume>
+ //schemaStr => <subvolume>
+ //fileStr => <file>
+ nodeName = viewFileName_;
+ catStr = nodeName;
+
+ //skip over node name
+ //measure node name length
+ //get to begining of volume name
+ //Measure length of node name
+ //skip over node name i.e. \MAYA, \AZTEC, etc
+ //and get to volume name
+ while((nodeName[nodeNameLen]!='.')&&
+ (nodeNameLen < 8)){
+ catStr++;
+ nodeNameLen++;
+ };
+
+ //skip over '.' and the '$' in volume name
+ catStr=&nodeName[nodeNameLen+2];
+ schemaStr=catStr;
+
+ //skip over the volume/catalog name
+ //while measuring catalog name length
+ while((catStr[catStrLen]!='.')&&
+ (catStrLen < 8))
+ {
+ schemaStr++;
+ catStrLen++;
+ }
+
+ //skip over the '.'
+ schemaStr++;
+ fileStr=schemaStr;
+
+ //skip over the subvolume/schema name
+ //while measuring schema name length
+ while((schemaStr[schemaStrLen]!='.')&&
+ (schemaStrLen < 8))
+ {
+ fileStr++;
+ schemaStrLen++;
+ }
+
+ //skip over the '.'
+ fileStr++;
+ fileStrLen = str_len(fileStr);
+
+ //figure out the node number for the node
+ //which has the primary partition.
+ primaryNodeNum=0;
+
+ if(!OSIM_runningSimulation())
+ primaryNodeNum = gpClusterInfo->mapNodeNameToNodeNum(NAString(nodeName));
+ }
+ else{
+ //get qualified name of the clustering index which should
+ //be the actual physical file name of the table
+ const QualifiedName fileNameObj = getClusteringIndex()->
+ getRandomPartition();
+ const NAString fileName = fileNameObj.getObjectName();
+
+ //get schemaName object
+ const SchemaName schemaNameObj = fileNameObj.getSchemaName();
+ const NAString schemaName = schemaNameObj.getSchemaName();
+
+ //get catalogName object
+ //this contains a string in the form \<node_name>.$volume
+ const CatalogName catalogNameObj = fileNameObj.getCatalogName();
+ const NAString catalogName = catalogNameObj.getCatalogName();
+ nodeName = (char*) catalogName.data();
+ catStr = nodeName;
+
+ //Measure length of node name
+ //skip over node name i.e. \MAYA, \AZTEC, etc
+ //and get to volume name
+ while((nodeName[nodeNameLen]!='.')&&
+ (nodeNameLen < 8)){
+ catStr++;
+ nodeNameLen++;
+ };
+
+ //get volume/catalog name
+ //skip ".$"
+ catStr=&nodeName[nodeNameLen+2];
#pragma nowarn(1506) // warning elimination
- catStrLen = catalogName.length() - (nodeNameLen+2);
+ catStrLen = catalogName.length() - (nodeNameLen+2);
#pragma warn(1506) // warning elimination
- //get subvolume/schema name
- schemaStr = (char *) schemaName.data();
+ //get subvolume/schema name
+ schemaStr = (char *) schemaName.data();
#pragma nowarn(1506) // warning elimination
- schemaStrLen = schemaName.length();
+ schemaStrLen = schemaName.length();
#pragma warn(1506) // warning elimination
- //get file name
- fileStr = (char *) fileName.data();
+ //get file name
+ fileStr = (char *) fileName.data();
#pragma nowarn(1506) // warning elimination
- fileStrLen = fileName.length();
+ fileStrLen = fileName.length();
#pragma warn(1506) // warning elimination
- //figure out the node number for the node
- //which has the primary partition.
- primaryNodeNum=0;
+ //figure out the node number for the node
+ //which has the primary partition.
+ primaryNodeNum=0;
- primaryNodeNum = gpClusterInfo->mapNodeNameToNodeNum(NAString(nodeName));
+ primaryNodeNum = gpClusterInfo->mapNodeNameToNodeNum(NAString(nodeName));
- }
- }
+ }
+ }
- Lng32 postCreateNATableWarnings = CmpCommon::diags()->getNumber(DgSqlCode::WARNING_);
+ Lng32 postCreateNATableWarnings = CmpCommon::diags()->getNumber(DgSqlCode::WARNING_);
- if(postCreateNATableWarnings != preCreateNATableWarnings)
- tableConstructionHadWarnings_=TRUE;
- const char *lobHdfsServer = CmpCommon::getDefaultString(LOB_HDFS_SERVER);
- Int32 lobHdfsPort = (Lng32)CmpCommon::getDefaultNumeric(LOB_HDFS_PORT);
- if (hasLobColumn())
- {
- // read lob related information from lob metadata
- // setFromStoredDesc(TRUE);
- //
- short *lobNumList = new (heap_) short[getColumnCount()];
- short *lobTypList = new (heap_) short[getColumnCount()];
- char **lobLocList = new (heap_) char*[getColumnCount()];
- char **lobColNameList = new (heap_) char*[getColumnCount()];
- const NAColumnArray &colArray = getNAColumnArray();
- NAColumn *nac = NULL;
-
- Lng32 j = 0;
- for (CollIndex i = 0; i < getColumnCount(); i++)
- {
- nac = colArray.getColumn(i);
-
- if (nac->getType()->getTypeQualifier() == NA_LOB_TYPE)
- {
- lobLocList[j] = new (heap_) char[1024];
- lobColNameList[j] = new (heap_)char[256];
- j++;
- }
- }
-
- NAString schNam;
- schNam = "\"";
- schNam += getTableName().getCatalogName();
- schNam += "\".\"";
- schNam += getTableName().getSchemaName();
- schNam += "\"";
-
- Lng32 numLobs = 0;
- Lng32 cliRC = SQL_EXEC_LOBddlInterface
- (
- (char*)schNam.data(),
- schNam.length(),
- objectUid().castToInt64(),
- numLobs,
- LOB_CLI_SELECT_CURSOR,
- lobNumList,
- lobTypList,
- lobLocList,lobColNameList,(char *)lobHdfsServer,lobHdfsPort,0,FALSE);
-
- if (cliRC == 0)
- {
- for (Lng32 i = 0; i < numLobs; i++)
- {
- nac = colArray.getColumn(lobNumList[i]);
+ if(postCreateNATableWarnings != preCreateNATableWarnings)
+ tableConstructionHadWarnings_=TRUE;
+ const char *lobHdfsServer = CmpCommon::getDefaultString(LOB_HDFS_SERVER);
+ Int32 lobHdfsPort = (Lng32)CmpCommon::getDefaultNumeric(LOB_HDFS_PORT);
+ if (hasLobColumn())
+ {
+ // read lob related information from lob metadata
+ // setFromStoredDesc(TRUE);
+ //
+ short *lobNumList = new (heap_) short[getColumnCount()];
+ short *lobTypList = new (heap_) short[getColumnCount()];
+ char **lobLocList = new (heap_) char*[getColumnCount()];
+ char **lobColNameList = new (heap_) char*[getColumnCount()];
+ const NAColumnArray &colArray = getNAColumnArray();
+ NAColumn *nac = NULL;
+
+ Lng32 j = 0;
+ for (CollIndex i = 0; i < getColumnCount(); i++)
+ {
+ nac = colArray.getColumn(i);
- nac->lobNum() = lobNumList[i];
- nac->lobStorageType() = (LobsStorage)lobTypList[i];
- nac->lobStorageLocation() = lobLocList[i];
- }
- } // if
- } // if
+ if (nac->getType()->getTypeQualifier() == NA_LOB_TYPE)
+ {
+ lobLocList[j] = new (heap_) char[1024];
+ lobColNameList[j] = new (heap_)char[256];
+ j++;
+ }
+ }
+
+ NAString schNam;
+ schNam = "\"";
+ schNam += getTableName().getCatalogName();
+ schNam += "\".\"";
+ schNam += getTableName().getSchemaName();
+ schNam += "\"";
+
+ Lng32 numLobs = 0;
+ Lng32 cliRC = SQL_EXEC_LOBddlInterface
+ (
+ (char*)schNam.data(),
+ schNam.length(),
+ objectUid().castToInt64(),
+ numLobs,
+ LOB_CLI_SELECT_CURSOR,
+ lobNumList,
+ lobTypList,
+ lobLocList,lobColNameList,(char *)lobHdfsServer,lobHdfsPort,0,FALSE);
+
+ if (cliRC == 0)
+ {
+ for (Lng32 i = 0; i < numLobs; i++)
+ {
+ nac = colArray.getColumn(lobNumList[i]);
- // LCOV_EXCL_STOP
- initialSize_ = heap_->getAllocSize();
- MonitorMemoryUsage_Exit((char*)mmPhase.data(), heap_, NULL, TRUE);
+ nac->lobNum() = lobNumList[i];
+ nac->lobStorageType() = (LobsStorage)lobTypList[i];
+ nac->lobStorageLocation() = lobLocList[i];
+ }
+ } // if
+ } // if
+
+ // LCOV_EXCL_STOP
+ initialSize_ = heap_->getAllocSize();
+ MonitorMemoryUsage_Exit((char*)mmPhase.data(), heap_, NULL, TRUE);
} // NATable()
#pragma warn(770) // warning elimination
@@ -5552,810 +5558,852 @@ NATable::NATable(BindWA *bindWA,
const CorrName& corrName,
NAMemory *heap,
struct hive_tbl_desc* htbl)
- //
- // The NATable heap ( i.e. heap_ ) used to come from ContextHeap
- // (i.e. heap) but it creates high memory usage/leakage in Context
- // Heap. Although the NATables are deleted at the end of each statement,
- // the heap_ is returned to heap (i.e. context heap) which caused
- // context heap containing a lot of not used chunk of memory. So it is
- // changed to be from whatever heap is passed in at the call in
- // NATableDB.getNATable.
- //
- // Now NATable objects can be cached.If an object is to be cached (persisted
- // across statements) a NATable heap is allocated for the object
- // and is passed in (this is done in NATableDB::get(CorrName& corrName...).
- // Otherwise a reference to the Statement heap is passed in. When a cached
- // object is to be deleted the object's heap is deleted which wipes out the
- // NATable object all its related stuff. NATable objects that are not cached
- // are wiped out at the end of the statement when the statement heap is deleted.
- //
- : heap_(heap),
- referenceCount_(0),
- refsIncompatibleDP2Halloween_(FALSE),
- isHalloweenTable_(FALSE),
- qualifiedName_(corrName.getExtendedQualNameObj(),heap),
- synonymReferenceName_(heap),
- fileSetName_(corrName.getQualifiedNameObj(),heap), // for now, set equal
- clusteringIndex_(NULL),
- colcount_(0),
- colArray_(heap),
- recordLength_(0),
- indexes_(heap),
- vertParts_(heap),
- colStats_(NULL),
- statsFetched_(FALSE),
- viewFileName_(NULL),
- viewText_(NULL),
- viewTextInNAWchars_(heap),
- viewTextCharSet_(CharInfo::UnknownCharSet),
- viewCheck_(NULL),
- viewColUsages_(NULL),
- flags_(IS_INSERTABLE | IS_UPDATABLE),
- insertMode_(COM_REGULAR_TABLE_INSERT_MODE),
- isSynonymTranslationDone_(FALSE),
- checkConstraints_(heap),
- createTime_(htbl->creationTS_),
- redefTime_(htbl->redeftime()),
- cacheTime_(0),
- statsTime_(0),
- catalogUID_(0),
- schemaUID_(0),
- objectUID_(0),
- objectType_(COM_UNKNOWN_OBJECT),
- partitioningScheme_(COM_UNKNOWN_PARTITIONING),
- uniqueConstraints_(heap),
- refConstraints_(heap),
- isAnMV_(FALSE),
- isAnMVMetaData_(FALSE),
- mvsUsingMe_(heap),
- mvInfo_(NULL),
- accessedInCurrentStatement_(TRUE),
- setupForStatement_(FALSE),
- resetAfterStatement_(FALSE),
- hitCount_(0),
- replacementCounter_(2),
- sizeInCache_(0),
- recentlyUsed_(TRUE),
- tableConstructionHadWarnings_(FALSE),
- isAnMPTableWithAnsiName_(FALSE),
- isUMDTable_(FALSE),
- isSMDTable_(FALSE),
- isMVUMDTable_(FALSE),
-
- // For virtual tables, we set the object schema version
- // to be the current schema version
- osv_(COM_VERS_CURR_SCHEMA),
- ofv_(COM_VERS_CURR_SCHEMA),
- partnsDesc_(NULL),
- colsWithMissingStats_(NULL),
- originalCardinality_(-1.0),
- tableIdList_(heap),
- rcb_(NULL),
- rcbLen_(0),
- keyLength_(0),
- parentTableName_(NULL),
- sgAttributes_(NULL),
- isHive_(TRUE),
- isHbase_(FALSE),
- isHbaseCell_(FALSE),
- isHbaseRow_(FALSE),
- isSeabase_(FALSE),
- isSeabaseMD_(FALSE),
- isSeabasePrivSchemaTable_(FALSE),
- isUserUpdatableSeabaseMD_(FALSE),
- resetHDFSStatsAfterStmt_(FALSE),
- hiveDefaultStringLen_(0),
- hiveTableId_(htbl->tblID_),
- tableDesc_(NULL),
- secKeySet_(heap),
- privInfo_(NULL),
- newColumns_(heap),
- snapshotName_(NULL),
- allColFams_(heap)
+ //
+ // The NATable heap ( i.e. heap_ ) used to come from ContextHeap
+ // (i.e. heap) but it creates high memory usage/leakage in Context
+ // Heap. Although the NATables are deleted at the end of each statement,
+ // the heap_ is returned to heap (i.e. context heap) which caused
+ // context heap containing a lot of not used chunk of memory. So it is
+ // changed to be from whatever heap is passed in at the call in
+ // NATableDB.getNATable.
+ //
+ // Now NATable objects can be cached.If an object is to be cached (persisted
+ // across statements) a NATable heap is allocated for the object
+ // and is passed in (this is done in NATableDB::get(CorrName& corrName...).
+ // Otherwise a reference to the Statement heap is passed in. When a cached
+ // object is to be deleted the object's heap is deleted which wipes out the
+ // NATable object all its related stuff. NATable objects that are not cached
+ // are wiped out at the end of the statement when the statement heap is deleted.
+ //
+ : heap_(heap),
+ referenceCount_(0),
+ refsIncompatibleDP2Halloween_(FALSE),
+ isHalloweenTable_(FALSE),
+ qualifiedName_(corrName.getExtendedQualNameObj(),heap),
+ synonymReferenceName_(heap),
+ fileSetName_(corrName.getQualifiedNameObj(),heap), // for now, set equal
+ clusteringIndex_(NULL),
+ colcount_(0),
+ colArray_(heap),
+ recordLength_(0),
+ indexes_(heap),
+ vertParts_(heap),
+ colStats_(NULL),
+ statsFetched_(FALSE),
+ viewFileName_(NULL),
+ viewText_(NULL),
+ viewTextInNAWchars_(heap),
+ viewTextCharSet_(CharInfo::UnknownCharSet),
+ viewCheck_(NULL),
+ viewColUsages_(NULL),
+ hiveOrigViewText_(NULL),
+ flags_(IS_INSERTABLE | IS_UPDATABLE),
+ insertMode_(COM_REGULAR_TABLE_INSERT_MODE),
+ isSynonymTranslationDone_(FALSE),
+ checkConstraints_(heap),
+ createTime_(htbl->creationTS_),
+ redefTime_(htbl->redeftime()),
+ cacheTime_(0),
+ statsTime_(0),
+ catalogUID_(0),
+ schemaUID_(0),
+ objectUID_(0),
+ objectType_(COM_UNKNOWN_OBJECT),
+ partitioningScheme_(COM_UNKNOWN_PARTITIONING),
+ uniqueConstraints_(heap),
+ refConstraints_(heap),
+ isAnMV_(FALSE),
+ isAnMVMetaData_(FALSE),
+ mvsUsingMe_(heap),
+ mvInfo_(NULL),
+ accessedInCurrentStatement_(TRUE),
+ setupForStatement_(FALSE),
+ resetAfterStatement_(FALSE),
+ hitCount_(0),
+ replacementCounter_(2),
+ sizeInCache_(0),
+ recentlyUsed_(TRUE),
+ tableConstructionHadWarnings_(FALSE),
+ isAnMPTableWithAnsiName_(FALSE),
+ isUMDTable_(FALSE),
+ isSMDTable_(FALSE),
+ isMVUMDTable_(FALSE),
+
+ // For virtual tables, we set the object schema version
+ // to be the current schema version
+ osv_(COM_VERS_CURR_SCHEMA),
+ ofv_(COM_VERS_CURR_SCHEMA),
+ partnsDesc_(NULL),
+ colsWithMissingStats_(NULL),
+ originalCardinality_(-1.0),
+ tableIdList_(heap),
+ rcb_(NULL),
+ rcbLen_(0),
+ keyLength_(0),
+ parentTableName_(NULL),
+ sgAttributes_(NULL),
+ isHive_(TRUE),
+ isHbase_(FALSE),
+ isHbaseCell_(FALSE),
+ isHbaseRow_(FALSE),
+ isSeabase_(FALSE),
+ isSeabaseMD_(FALSE),
+ isSeabasePrivSchemaTable_(FALSE),
+ isUserUpdatableSeabaseMD_(FALSE),
+ resetHDFSStatsAfterStmt_(FALSE),
+ hiveDefaultStringLen_(0),
+ hiveTableId_(htbl->tblID_),
+ tableDesc_(NULL),
+ secKeySet_(heap),
+ privInfo_(NULL),
+ newColumns_(heap),
+ snapshotName_(NULL),
+ allColFams_(heap)
{
- NAString tblName = qualifiedName_.getQualifiedNameObj().getQualifiedNameAsString();
- NAString mmPhase;
-
- Lng32 preCreateNATableWarnings = CmpCommon::diags()->getNumber(DgSqlCode::WARNING_);
-
- //set heap type
- if(heap_ == CmpCommon::statementHeap()){
- heapType_ = STATEMENT;
- mmPhase = "NATable Init (Stmt) - " + tblName;
- }else if (heap_ == CmpCommon::contextHeap()){
- heapType_ = CONTEXT;
- mmPhase = "NATable Init (Cnxt) - " + tblName;
- }else {
- heapType_ = OTHER;
- mmPhase = "NATable Init (Other) - " + tblName;
- }
+ NAString tblName = qualifiedName_.getQualifiedNameObj().getQualifiedNameAsString();
+ NAString mmPhase;
+
+ Lng32 preCreateNATableWarnings = CmpCommon::diags()->getNumber(DgSqlCode::WARNING_);
+
+ //set heap type
+ if(heap_ == CmpCommon::statementHeap()){
+ heapType_ = STATEMENT;
+ mmPhase = "NATable Init (Stmt) - " + tblName;
+ }else if (heap_ == CmpCommon::contextHeap()){
+ heapType_ = CONTEXT;
+ mmPhase = "NATable Init (Cnxt) - " + tblName;
+ }else {
+ heapType_ = OTHER;
+ mmPhase = "NATable Init (Other) - " + tblName;
+ }
- MonitorMemoryUsage_Enter((char*)mmPhase.data(), heap_, TRUE);
+ MonitorMemoryUsage_Enter((char*)mmPhase.data(), heap_, TRUE);
- isTrigTempTable_ = FALSE;
+ isTrigTempTable_ = FALSE;
- insertMode_ =
- COM_MULTISET_TABLE_INSERT_MODE; // allow dup, to check
- //ComInsertMode::COM_MULTISET_TABLE_INSERT_MODE; // allow dup, to check
+ insertMode_ =
+ COM_MULTISET_TABLE_INSERT_MODE; // allow dup, to check
+ //ComInsertMode::COM_MULTISET_TABLE_INSERT_MODE; // allow dup, to check
- //
- // Add timestamp information.
- //
+ //
+ // Add timestamp information.
+ //
- // To get from Hive
- /*
- createTime_ = longArrayToInt64(table_desc->tableDesc()->createtime);
- redefTime_ = longArrayToInt64(table_desc->tableDesc()->redeftime);
- cacheTime_ = longArrayToInt64(table_desc->tableDesc()->cachetime);
- */
+ // To get from Hive
+ /*
+ createTime_ = longArrayToInt64(table_desc->tableDesc()->createtime);
+ redefTime_ = longArrayToInt64(table_desc->tableDesc()->redeftime);
+ cacheTime_ = longArrayToInt64(table_desc->tableDesc()->cachetime);
+ */
+
+ // NATable has a schemaUID column, probably should propogate it.
+ // for now, set to 0.
+ schemaUID_ = 0;
+
+ // Set the objectUID_
+ // If the HIVE table has been registered in Trafodion, get the objectUID
+ // from Trafodion, otherwise, set it to 0.
+ // TBD - does getQualifiedNameObj handle delimited names correctly?
+ if ( !fetchObjectUIDForNativeTable(corrName) )
+ return;
- // NATable has a schemaUID column, probably should propogate it.
- // for now, set to 0.
- schemaUID_ = 0;
+ if ( objectUID_ > 0 )
+ setHasExternalTable(TRUE);
- // Set the objectUID_
- // If the HIVE table has been registered in Trafodion, get the objectUID
- // from Trafodion, otherwise, set it to 0.
- // TBD - does getQualifiedNameObj handle delimited names correctly?
- if ( !fetchObjectUIDForNativeTable(corrName) )
- return;
+ // for HIVE objects, the schema owner and table owner is HIVE_ROLE_ID
+ if (CmpCommon::context()->isAuthorizationEnabled())
+ {
+ owner_ = HIVE_ROLE_ID;
+ schemaOwner_ = HIVE_ROLE_ID;
+ }
+ else
+ {
+ owner_ = SUPER_USER;
+ schemaOwner_ = SUPER_USER;
+ }
- if ( objectUID_ > 0 )
- setHasExternalTable(TRUE);
+ getPrivileges(NULL);
- // for HIVE objects, the schema owner and table owner is HIVE_ROLE_ID
- if (CmpCommon::context()->isAuthorizationEnabled())
- {
- owner_ = HIVE_ROLE_ID;
- schemaOwner_ = HIVE_ROLE_ID;
- }
- else
- {
- owner_ = SUPER_USER;
- schemaOwner_ = SUPER_USER;
- }
+ // TBD - if authorization is enabled and there is no external table to store
+ // privileges, go get privilege information from HIVE metadata ...
- getPrivileges(NULL);
+ // TBD - add a check to verify that the column list coming from HIVE matches
+ // the column list stored in the external table. Maybe some common method
+ // that can be used to compare other things as well...
- // TBD - if authorization is enabled and there is no external table to store
- // privileges, go get privilege information from HIVE metadata ...
+ objectType_ = COM_BASE_TABLE_OBJECT;
- // TBD - add a check to verify that the column list coming from HIVE matches
- // the column list stored in the external table. Maybe some common method
- // that can be used to compare other things as well...
+ // to check
+ partitioningScheme_ = COM_UNKNOWN_PARTITIONING;
- objectType_ = COM_BASE_TABLE_OBJECT;
+ // to check
+ rcb_ = 0;
+ rcbLen_ = 0;
+ keyLength_ = 0;
- // to check
- partitioningScheme_ = COM_UNKNOWN_PARTITIONING;
+ partnsDesc_ = NULL;
- // to check
- rcb_ = 0;
- rcbLen_ = 0;
- keyLength_ = 0;
+ //
+ // Insert a NAColumn in the colArray_ for this NATable for each
+ // columns_desc from the ARK SMD. Returns TRUE if error creating NAColumns.
+ //
- partnsDesc_ = NULL;
+ if (createNAColumns(htbl->getColumns(),
+ this,
+ colArray_ /*OUT*/,
+ heap_))
+ //coverity[leaked_storage]
+ return;
- //
- // Insert a NAColumn in the colArray_ for this NATable for each
- // columns_desc from the ARK SMD. Returns TRUE if error creating NAColumns.
- //
- if (createNAColumns(htbl->getColumns(),
- this,
- colArray_ /*OUT*/,
- heap_))
- //coverity[leaked_storage]
- return;
+ //
+ // Set colcount_ after all possible errors (Binder uses nonzero colcount
+ // as an indicator of valid table definition).
+ //
+ // To set it via the new createNAColumns()
+ colcount_ = colArray_.entries();
- //
- // Set colcount_ after all possible errors (Binder uses nonzero colcount
- // as an indicator of valid table definition).
- //
+ // compute record length from colArray
- // To set it via the new createNAColumns()
- colcount_ = colArray_.entries();
+ Int32 recLen = 0;
+ for ( CollIndex i=0; i<colcount_; i++ ) {
+ recLen += colArray_[i]->getType()->getNominalSize();
+ }
- // compute record length from colArray
+ setRecordLength(recLen);
- Int32 recLen = 0;
- for ( CollIndex i=0; i<colcount_; i++ ) {
- recLen += colArray_[i]->getType()->getNominalSize();
- }
+ //
+ // Add view information, if this is a native hive view
+ //
+ if (htbl->isView())
+ {
+ NAString viewExpandedText(htbl->viewExpandedText_);
- setRecordLength(recLen);
+ // expanded hive view text quotes table and column names with
+ // back single quote (`). It also refers to default hive schema
+ // as `default`.
+ // Convert hive text to traf format.
+ // hive "default" schema is referred as "hive" in traf.
+ // replace `default` with hive and replace ` with "
- if (createNAFileSets(htbl /*IN*/,
- this /*IN*/,
- colArray_ /*IN*/,
- indexes_ /*OUT*/,
- vertParts_ /*OUT*/,
- clusteringIndex_ /*OUT*/,
- tableIdList_ /*OUT*/,
- heap_,
- bindWA
- )) {
- colcount_ = 0; // indicates failure
- return;
- }
+ // replace `default` with "hive"
+ viewExpandedText = replaceAll(viewExpandedText, "`default`", "hive");
- // HIVE-TBD ignore constraint info creation for now
+ // replace ` with "
+ viewExpandedText = replaceAll(viewExpandedText, "`", "");
+
+ NAString createViewStmt("CREATE VIEW ");
+ createViewStmt += htbl->tblName_ + NAString(" AS ") +
+ viewExpandedText + NAString(";");
+
+ Lng32 viewTextLen = createViewStmt.length();
+ viewText_ = new (heap_) char[viewTextLen+ 2];
+ strcpy(viewText_, createViewStmt.data());
+ hiveOrigViewText_ = new (heap_) char[strlen(htbl->viewOriginalText_)+2];
+ strcpy(hiveOrigViewText_, htbl->viewOriginalText_);
+ viewTextCharSet_ = CharInfo::UTF8;
+
+ viewFileName_ = NULL;
+ UInt32 viewFileNameLength = str_len(htbl->tblName_) + 1;
+ viewFileName_ = new (heap_) char[viewFileNameLength];
+ memcpy(viewFileName_, htbl->tblName_, viewFileNameLength);
+
+ setUpdatable(FALSE);
+ setInsertable(FALSE);
+ }
+ else
+ {
+ if (createNAFileSets(htbl /*IN*/,
+ this /*IN*/,
+ colArray_ /*IN*/,
+ indexes_ /*OUT*/,
+ vertParts_ /*OUT*/,
+ clusteringIndex_ /*OUT*/,
+ tableIdList_ /*OUT*/,
+ heap_,
+ bindWA
+ )) {
+ colcount_ = 0; // indicates failure
+ return;
+ }
+ }
+
+ // HIVE-TBD ignore constraint info creation for now
- // If there is a host variable associated with this table, store it
- // for use by the generator to generate late-name resolution information.
- //
- HostVar *hv = corrName.getPrototype();
- prototype_ = hv ? new (heap_) HostVar(*hv) : NULL;
- // MV
- // Initialize the MV support data members
- isAnMV_ = FALSE;
- isAnMVMetaData_ = FALSE;
+ // If there is a host variable associated with this table, store it
+ // for use by the generator to generate late-name resolution information.
+ //
+ HostVar *hv = corrName.getPrototype();
+ prototype_ = hv ? new (heap_) HostVar(*hv) : NULL;
- Lng32 postCreateNATableWarnings = CmpCommon::diags()->getNumber(DgSqlCode::WARNING_);
+ // MV
+ // Initialize the MV support data members
+ isAnMV_ = FALSE;
+ isAnMVMetaData_ = FALSE;
+
+ Lng32 postCreateNATableWarnings = CmpCommon::diags()->getNumber(DgSqlCode::WARNING_);
- if(postCreateNATableWarnings != preCreateNATableWarnings)
- tableConstructionHadWarnings_=TRUE;
+ if(postCreateNATableWarnings != preCreateNATableWarnings)
+ tableConstructionHadWarnings_=TRUE;
- hiveDefaultStringLen_ = CmpCommon::getDefaultLong(HIVE_MAX_STRING_LENGTH);
- Int32 hiveDefaultStringLenInBytes = CmpCommon::getDefaultLong(HIVE_MAX_STRING_LENGTH_IN_BYTES);
- if( hiveDefaultStringLenInBytes != 32000 )
- hiveDefaultStringLen_ = hiveDefaultStringLenInBytes;
+ hiveDefaultStringLen_ = CmpCommon::getDefaultLong(HIVE_MAX_STRING_LENGTH);
+ Int32 hiveDefaultStringLenInBytes = CmpCommon::getDefaultLong(HIVE_MAX_STRING_LENGTH_IN_BYTES);
+ if( hiveDefaultStringLenInBytes != 32000 )
+ hiveDefaultStringLen_ = hiveDefaultStringLenInBytes;
- // LCOV_EXCL_STOP
- initialSize_ = heap_->getAllocSize();
- MonitorMemoryUsage_Exit((char*)mmPhase.data(), heap_, NULL, TRUE);
+ // LCOV_EXCL_STOP
+ initialSize_ = heap_->getAllocSize();
+ MonitorMemoryUsage_Exit((char*)mmPhase.data(), heap_, NULL, TRUE);
} // NATable()
#pragma warn(770) // warning elimination
+NABoolean NATable::doesMissingStatsWarningExist(CollIndexSet & colsSet) const
+{
+ return colsWithMissingStats_->contains(&colsSet);
+}
+
+NABoolean NATable::insertMissingStatsWarning(CollIndexSet colsSet) const
+{
+ CollIndexSet * setOfColsWithMissingStats = new (STMTHEAP) CollIndexSet (colsSet);
+
+ Int32 someVar = 1;
+ CollIndexSet * result = colsWithMissingStats_->insert(setOfColsWithMissingStats, &someVar);
+
+ if (result == NULL)
+ return FALSE;
+ else
+ return TRUE;
+}
+
+// This gets called in the Optimizer phase -- the Binder phase will already have
+// marked columns that were referenced in the query, so that the ustat function
+// below can decide which histograms and histints to leave in the stats list
+// and which to remove.
+//
+StatsList &
+NATable::getStatistics()
+{
+ if (!statsFetched_)
+ {
+ // mark the kind of histograms needed for this table's columns
+ markColumnsForHistograms();
+
+ NAString tblName = qualifiedName_.getQualifiedNameObj().getQualifiedNameAsString();
+ NAString mmPhase = "NATable getStats - " + tblName;
+ MonitorMemoryUsage_Enter((char*)mmPhase.data(), NULL, TRUE);
+
+ //trying to get statistics for a new statement allocate colStats_
+ colStats_ = new (CmpCommon::statementHeap()) StatsList(CmpCommon::statementHeap());
+
+ // Do not create statistics on the fly for the following tables
+ if (isAnMV() || isUMDTable() ||
+ isSMDTable() || isMVUMDTable() ||
+ isTrigTempTable() )
+ CURRSTMT_OPTDEFAULTS->setHistDefaultSampleSize(0);
+
+ CURRCONTEXT_HISTCACHE->getHistograms(*this);
+
+ if ((*colStats_).entries() > 0)
+ originalCardinality_ = (*colStats_)[0]->getRowcount();
+ else
+ originalCardinality_ = ActiveSchemaDB()->getDefaults().getAsDouble(HIST_NO_STATS_ROWCOUNT);
+
+ // -----------------------------------------------------------------------
+ // So now we have read in the contents of the HISTOGRM & HISTINTS
+ // tables from the system catalog. Before we can use them, we need
+ // to massage them into a format we can use. In particular, we need
+ // to make sure that what we read in (which the user may have mucked
+ // about with) matches the histogram classes' internal semantic
+ // requirements. Also, we need to generate the MultiColumnUecList.
+ // ----------------------------------------------------------------------
+
+ // what did the user set as the max number of intervals?
+ NADefaults &defs = ActiveSchemaDB()->getDefaults();
+ CollIndex maxIntervalCount = defs.getAsLong(HIST_MAX_NUMBER_OF_INTERVALS);
+
+ //-----------------------------------------------------------------------------------
+ // Need to flag the MC colStatsDesc so it is only used for the range partitioning task
+ // and not any cardinality calculations tasks. Flagging it also makes the logic
+ // to check fo the presence for this MC easier (at the time we need to create
+ // the range partitioning function)
+ //-----------------------------------------------------------------------------------
+
+ if (CmpCommon::getDefault(HBASE_RANGE_PARTITIONING_MC_SPLIT) == DF_ON &&
+ !(*colStats_).allFakeStats())
+ {
+ CollIndex currentMaxsize = 1;
+ Int32 posMCtoUse = -1;
+
+ NAColumnArray partCols;
+
+ if (getClusteringIndex()->getPartitioningKeyColumns().entries() > 0)
+ partCols = getClusteringIndex()->getPartitioningKeyColumns();
+ else
+ partCols = getClusteringIndex()->getIndexKeyColumns();
+
+ CollIndex partColNum = partCols.entries();
+
+ // look for MC histograms that have multiple intervals and whose columns are a prefix for the
+ // paritition column list. If multiple pick the one with the most matching columns
+ for (Int32 i=0; i < (*colStats_).entries(); i++)
+ {
+ NAColumnArray statsCols = (*colStats_)[i]->getStatColumns();
+ CollIndex colNum = statsCols.entries();
+
+ CollIndex j = 0;
+
+ NABoolean potentialMatch = TRUE;
+ if ((colNum > currentMaxsize) &&
+ (!(*colStats_)[i]->isSingleIntHist()) && // no SIH -- number of histograms is large enough to do splitting
+ (colNum <= partColNum))
+ {
+ while ((j < colNum) && potentialMatch)
+ {
+ j++;
+ NAColumn * col = partCols[j-1];
+ if (statsCols[j-1]->getPosition() != partCols[j-1]->getPosition())
+ {
+ potentialMatch = FALSE;
+ break;
+ }
+ }
+ }
+ else
+ {
+ potentialMatch = FALSE;
+ }
+
+ if (potentialMatch)
+ {
+ currentMaxsize = j;
+ posMCtoUse = i;
+ }
+
+ // we got what we need, just return
+ if (potentialMatch && (currentMaxsize == partColNum))
+ {
+ break;
+ }
+ }
+
+ if (posMCtoUse >= 0)
+ {
+ (*colStats_)[posMCtoUse]->setMCforHbasePartitioning (TRUE);
+ }
+ }
+
+ // *************************************************************************
+ // FIRST: Generate the stats necessary to later create the
+ // MultiColumnUecList; then filter out the multi-column histograms
+ // because later code doesn't know how to handle them
+ // In the same loop, also mark another flag for originally fake histogram
+ // This is to differentiate the cases when the histogram is fake because
+ // it has no statistics and the case where the histogram has been termed
+ // fake by the optimizer because its statistics is no longer reliable.
+ // *************************************************************************
+ CollIndex i ;
+ for ( i = 0 ; i < (*colStats_).entries() ; /* no automatic increment */ )
+ {
+ // the StatsList has two lists which it uses to store the information we
+ // need to fill the MultiColumnUecList with <table-col-list,uec value> pairs:
+ //
+ // LIST(NAColumnArray) groupUecColumns_
+ // LIST(CostScalar) groupUecValues_
+ //
+ // ==> insert the NAColumnArray & uec total values for each
+ // entry in colStats_
+
+ // don't bother storing multicolumnuec info for fake histograms
+ // but do set the originallly fake histogram flag to TRUE
+ if ( (*colStats_)[i]->isFakeHistogram() )
+ (*colStats_)[i]->setOrigFakeHist(TRUE);
+ else
+ {
+ NAColumnArray cols = (*colStats_)[i]->getStatColumns() ;
+ (*colStats_).groupUecColumns_.insert(cols) ;
+
+ CostScalar uecs = (*colStats_)[i]->getTotalUec() ;
+ (*colStats_).groupUecValues_.insert(uecs) ;
+
+ if (CmpCommon::getDefault(USTAT_COLLECT_MC_SKEW_VALUES) == DF_ON)
+ {
+ MCSkewedValueList mcSkewedValueList = (*colStats_)[i]->getMCSkewedValueList() ;
+ (*colStats_).groupMCSkewedValueLists_.insert(mcSkewedValueList) ;
+ }
+ }
+
+ // MCH:
+ // once we've stored the column/uec information, filter out the
+ // multi-column histograms, since our synthesis code doesn't
+ // handle them
+ if (( (*colStats_)[i]->getStatColumns().entries() != 1) &&
+ (!(*colStats_)[i]->isMCforHbasePartitioning()))
+ {
+ (*colStats_).removeAt(i) ;
+ }
+ else
+ {
+ i++ ; // in-place removal from a list is a bother!
+ }
+ }
+
+ // *************************************************************************
+ // SECOND: do some fixup work to make sure the histograms maintain
+ // the semantics we later expect (& enforce)
+ // *************************************************************************
+
+ // -------------------------------------------------------------------------
+ // HISTINT fixup-code : char-string histograms
+ // -------------------------------------------------------------------------
+ // problem arises with HISTINTs that are for char* columns
+ // here's what we can get:
+ //
+ // Rows Uec Value
+ // ---- --- -----
+ // 0 0 "value"
+ // 10 5 "value"
+ //
+ // this is not good! The problem is our (lousy) encoding of
+ // char strings into EncodedValue's
+ //
+ // After much deliberation, here's our current fix:
+ //
+ // Rows Uec Value
+ // ---- --- -----
+ // 0 0 "valu" <-- reduce the min value of 1st interval
+ // 10 5 "value" by a little bit
+ //
+ // When we find two intervals like this where they aren't the
+ // first intervals in the histogram, we simply merge them into
+ // one interval (adding row/uec information) and continue; note
+ // that in this case, we haven't actually lost any information;
+ // we've merely made sense out of (the garbage) what we've got
+ //
+ // -------------------------------------------------------------------------
+ // additional HISTINT fixup-code
+ // -------------------------------------------------------------------------
+ // 1. If there are zero or one HISTINTs, then set the HISTINTs to match
+ // the max/min information contained in the COLSTATS object.
+ //
+ // 2. If there are any HISTINTs whose boundary values are out-of-order,
+ // we abort with an an ERROR message.
+ //
+ // 3. If there is a NULL HISTINT at the end of the Histogram, then we
+ // need to make sure there are *TWO* NULL HISTINTS, to preserve correct
+ // histogram semantics for single-valued intervals.
+ // -------------------------------------------------------------------------
+
+ CollIndex j ;
+ for ( i = 0 ; i < (*colStats_).entries() ; i++ )
+ {
+ // we only worry about histograms on char string columns
+ // correction: it turns out that these semantically-deranged
+ // ---------- histograms were being formed for other, non-char string
+ // columns, so we commented out the code below
+ // if ( colStats_[i]->getStatColumns()[0]->getType()->getTypeQualifier() !=
+ // NA_CHARACTER_TYPE)
+ // continue ; // not a string, skip to next
+
+ ColStatsSharedPtr stats = (*colStats_)[i] ;
+
+ HistogramSharedPtr hist = stats->getHistogramToModify() ;
+ // histograms for key columns of a table that are not
+ // referenced in the query are read in with zero intervals
+ // (to conserve memory); however, internal
+ // histogram-semantic checking code assumes that any
+ // histogram which has zero intervals is FAKE; however
+ // however, MDAM will not be chosen in the case where one of
+ // the histograms for a key column is FAKE. Thus -- we will
+ // avoid this entire issue by creating a single interval for
+ // any Histograms that we read in that are empty.
+ if ( hist->entries() < 2 )
+ {
+ if(stats->getMinValue() > stats->getMaxValue())
+ {
+ *CmpCommon::diags() << DgSqlCode(CATALOG_HISTOGRM_HISTINTS_TABLES_CONTAIN_BAD_VALUE)
+ << DgString0("")
+ << DgString1(stats->getStatColumns()[0]->getFullColRefNameAsAnsiString().data() );
+
+ stats->createFakeHist();
+ continue;
+ }
+
+ stats->setToSingleInterval ( stats->getMinValue(),
+ stats->getMaxValue(),
+ stats->getRowcount(),
+ stats->getTotalUec() ) ;
+ // now we have to undo some of the automatic flag-setting
+ // of ColStats::setToSingleInterval()
+ stats->setMinSetByPred (FALSE) ;
+ stats->setMaxSetByPred (FALSE) ;
+ stats->setShapeChanged (FALSE) ;
+ continue ; // skip to next ColStats
+ }
+
+ // NB: we'll handle the first Interval last
+ for ( j = 1 ; j < hist->entries()-1 ; /* no automatic increment */ )
+ {
+
+ if ( (*hist)[j].getUec() == 0 || (*hist)[j].getCardinality() == 0 )
+ {
+ hist->removeAt(j) ;
+ continue ; // don't increment, loop again
+ }
+ // intervals must be in order!
+ if ( (*hist)[j].getBoundary() > (*hist)[j+1].getBoundary() )
+ {
+ *CmpCommon::diags() <<
+ DgSqlCode(CATALOG_HISTINTS_TABLES_CONTAIN_BAD_VALUES)
+ << DgInt0(j)
+ << DgInt1(j+1)
+ << DgString1(stats->getStatColumns()[0]->getFullColRefNameAsAnsiString().data() );
+
+ stats->createFakeHist();
+ break ; // skip to next ColStats
+ }
-NABoolean NATable::doesMissingStatsWarningExist(CollIndexSet & colsSet) const
-{
- return colsWithMissingStats_->contains(&colsSet);
-}
+ if ( (*hist)[j].getBoundary() == (*hist)[j+1].getBoundary() )
+ {
+ // merge Intervals, if the two consecutive intervals have same
+ // boundaries and these are not single valued (UEC > 1)
+ // If there are more two single valued intervals, then merge
+ // all except the last one.
+ NABoolean mergeIntervals = FALSE;
-NABoolean NATable::insertMissingStatsWarning(CollIndexSet colsSet) const
-{
- CollIndexSet * setOfColsWithMissingStats = new (STMTHEAP) CollIndexSet (colsSet);
+ if (CmpCommon::getDefault(COMP_BOOL_79) == DF_ON)
+ {
+ mergeIntervals = TRUE;
- Int32 someVar = 1;
- CollIndexSet * result = colsWithMissingStats_->insert(setOfColsWithMissingStats, &someVar);
+ if( (j < (hist->entries() - 2)) && ((*hist)[j+1].getUec() == 1) &&
+ ((*hist)[j+1].getBoundary() != (*hist)[j+2].getBoundary())
+ ||
+ (j == (hist->entries() - 2)) && ((*hist)[j+1].getUec() == 1) )
+ mergeIntervals = FALSE;
+ }
+ else
+ {
+ if ( (*hist)[j+1].getUec() > 1)
+ mergeIntervals = TRUE;
+ }
- if (result == NULL)
- return FALSE;
- else
- return TRUE;
-}
+ if ( mergeIntervals )
+ {
+ // if the intervals with same boundary are not SVI, just merge them
+ // together.
+ // Also do the merge, if there are more than one SVIs with same
+ // encoded interval boundary. Example, we want to avoid intervals
+ // such as
+ // boundary inclusive_flag UEC
+ // 12345.00 < 1
+ // 12345.00 < 1
+ // 12345.00 <= 1
+ // These would be changed to
+ // 12345.00 < 2
+ // 12345.00 <= 1
+ CostScalar combinedRows = (*hist)[ j ].getCardinality() +
+ (*hist)[j+1].getCardinality() ;
+ CostScalar combinedUec = (*hist)[ j ].getUec() +
+ (*hist)[j+1].getUec() ;
+ (*hist)[j].setCardAndUec (combinedRows, combinedUec) ;
+ stats->setIsColWithBndryConflict(TRUE);
+ hist->removeAt(j+1) ;
+ }
+ else
+ {
+ // for some reason, some SVI's aren't being
+ // generated correctly!
+ (*hist)[j].setBoundIncl(FALSE) ;
+ (*hist)[j+1].setBoundIncl(TRUE) ;
+ j++;
+ }
+ }
+ else
+ j++ ; // in-place removal from a list is a bother!
+ } // loop over intervals
-// This gets called in the Optimizer phase -- the Binder phase will already have
-// marked columns that were referenced in the query, so that the ustat function
-// below can decide which histograms and histints to leave in the stats list
-// and which to remove.
-//
- StatsList &
-NATable::getStatistics()
-{
- if (!statsFetched_)
- {
- // mark the kind of histograms needed for this table's columns
- markColumnsForHistograms();
-
- NAString tblName = qualifiedName_.getQualifiedNameObj().getQualifiedNameAsString();
- NAString mmPhase = "NATable getStats - " + tblName;
- MonitorMemoryUsage_Enter((char*)mmPhase.data(), NULL, TRUE);
-
- //trying to get statistics for a new statement allocate colStats_
- colStats_ = new (CmpCommon::statementHeap()) StatsList(CmpCommon::statementHeap());
-
- // Do not create statistics on the fly for the following tables
- if (isAnMV() || isUMDTable() ||
- isSMDTable() || isMVUMDTable() ||
- isTrigTempTable() )
- CURRSTMT_OPTDEFAULTS->setHistDefaultSampleSize(0);
-
- CURRCONTEXT_HISTCACHE->getHistograms(*this);
-
- if ((*colStats_).entries() > 0)
- originalCardinality_ = (*colStats_)[0]->getRowcount();
- else
- originalCardinality_ = ActiveSchemaDB()->getDefaults().getAsDouble(HIST_NO_STATS_ROWCOUNT);
-
- // -----------------------------------------------------------------------
- // So now we have read in the contents of the HISTOGRM & HISTINTS
- // tables from the system catalog. Before we can use them, we need
- // to massage them into a format we can use. In particular, we need
- // to make sure that what we read in (which the user may have mucked
- // about with) matches the histogram classes' internal semantic
- // requirements. Also, we need to generate the MultiColumnUecList.
- // ----------------------------------------------------------------------
-
- // what did the user set as the max number of intervals?
- NADefaults &defs = ActiveSchemaDB()->getDefaults();
- CollIndex maxIntervalCount = defs.getAsLong(HIST_MAX_NUMBER_OF_INTERVALS);
-
- //-----------------------------------------------------------------------------------
- // Need to flag the MC colStatsDesc so it is only used for the range partitioning task
- // and not any cardinality calculations tasks. Flagging it also makes the logic
- // to check fo the presence for this MC easier (at the time we need to create
- // the range partitioning function)
- //-----------------------------------------------------------------------------------
-
- if (CmpCommon::getDefault(HBASE_RANGE_PARTITIONING_MC_SPLIT) == DF_ON &&
- !(*colStats_).allFakeStats())
- {
- CollIndex currentMaxsize = 1;
- Int32 posMCtoUse = -1;
+ // ----------------------------------------------------------------------
+ // now we handle the first interval
+ //
+ // first, it must be in order w.r.t. the second interval!
+ if ( (*hist)[0].getBoundary() > (*hist)[1].getBoundary() )
+ {
+ *CmpCommon::diags() <<
+ DgSqlCode(CATALOG_HISTINTS_TABLES_CONTAIN_BAD_VALUES)
+ << DgInt0(0)
+ << DgInt1(1)
+ << DgString1(stats->getStatColumns()[0]->getFullColRefNameAsAnsiString().data() );
+
+ stats->createFakeHist();
+ continue ; // skip to next ColStats
+ }
- NAColumnArray partCols;
+ // second, handle the case where first and second interval are the same
+ if ( hist->entries() > 1 && // avoid the exception! might just be a single NULL
+ // // interval after the loop above
+ (*hist)[0].getBoundary() == (*hist)[1].getBoundary() &&
+ (*hist)[1].getUec() > 1 )
+ {
+ const double KLUDGE_VALUE = 0.0001 ;
+ const double oldVal = (*hist)[0].getBoundary().getDblValue() ;
+ const EncodedValue newVal =
+ EncodedValue(oldVal - (_ABSOLUTE_VALUE_(oldVal) * KLUDGE_VALUE)) ; // kludge alert!
+ //Absolute of oldval due to CR 10-010426-2457
+ (*hist)[0].setBoundary( newVal ) ;
+ (*hist)[0].setBoundIncl( FALSE ) ; // no longer a real boundary!
+ (*colStats_)[i]->setMinValue( newVal ) ; // set aggr info also
+ }
+ // done with first interval
+ // ----------------------------------------------------------------------
- if (getClusteringIndex()->getPartitioningKeyColumns().entries() > 0)
- partCols = getClusteringIndex()->getPartitioningKeyColumns();
- else
- partCols = getClusteringIndex()->getIndexKeyColumns();
+ //
+ // NULL values must only be stored in single-valued intervals
+ // in the histograms ; so, just in case we're only getting
+ // *one* HistInt for the NULL interval, insert a 2nd one
+ //
+ // 0 1 2
+ // | | |
+ // | | | entries() == 3
+ // NULL
+ //
+ // 0 1 2 3
+ // | | | |
+ // | | | | entries() == 4
+ // new NULL
+ // NULL
+ //
+ if ( hist->lastHistInt().isNull() )
+ {
+ CollIndex count = hist->entries() ;
+ if ( !(*hist)[count-2].isNull() )
+ {
+ // insert a 2nd NULL HISTINT, with boundaryIncl value FALSE
+ HistInt secondLast (hist->lastHistInt().getBoundary(), FALSE) ;
+ hist->insertAt(count-1,secondLast) ;
+ // new HISTINT by default has row/uec of 0, which is what we want
+ }
+ }
- CollIndex partColNum = partCols.entries();
+ //
+ // Now, reduce the total number of intervals to be the number
+ // that the user wants. This is used to test the tradeoffs
+ // between compile time & rowcount estimation.
+ //
+ (*colStats_)[i]->setMaxIntervalCount (maxIntervalCount) ;
+ (*colStats_)[i]->reduceToMaxIntervalCount () ;
- // look for MC histograms that have multiple intervals and whose columns are a prefix for the
- // paritition column list. If multiple pick the one with the most matching columns
- for (Int32 i=0; i < (*colStats_).entries(); i++)
- {
- NAColumnArray statsCols = (*colStats_)[i]->getStatColumns();
- CollIndex colNum = statsCols.entries();
-
- CollIndex j = 0;
-
- NABoolean potentialMatch = TRUE;
- if ((colNum > currentMaxsize) &&
- (!(*colStats_)[i]->isSingleIntHist()) && // no SIH -- number of histograms is large enough to do splitting
- (colNum <= partColNum))
- {
- while ((j < colNum) && potentialMatch)
- {
- j++;
- NAColumn * col = partCols[j-1];
- if (statsCols[j-1]->getPosition() != partCols[j-1]->getPosition())
- {
- potentialMatch = FALSE;
- break;
- }
- }
- }
- else
- {
- potentialMatch = FALSE;
- }
-
- if (potentialMatch)
- {
- currentMaxsize = j;
- posMCtoUse = i;
- }
-
- // we got what we need, just return
- if (potentialMatch && (currentMaxsize == partColNum))
- {
- break;
- }
- }
-
- if (posMCtoUse >= 0)
- {
- (*colStats_)[posMCtoUse]->setMCforHbasePartitioning (TRUE);
- }
- }
+ if ((*colStats_)[i]->getRowcount() == (*colStats_)[i]->getTotalUec() )
+ (*colStats_)[i]->setAlmostUnique(TRUE);
- // *************************************************************************
- // FIRST: Generate the stats necessary to later create the
- // MultiColumnUecList; then filter out the multi-column histograms
- // because later code doesn't know how to handle them
- // In the same loop, also mark another flag for originally fake histogram
- // This is to differentiate the cases when the histogram is fake because
- // it has no statistics and the case where the histogram has been termed
- // fake by the optimizer because its statistics is no longer reliable.
- // *************************************************************************
- CollIndex i ;
- for ( i = 0 ; i < (*colStats_).entries() ; /* no automatic increment */ )
- {
- // the StatsList has two lists which it uses to store the information we
- // need to fill the MultiColumnUecList with <table-col-list,uec value> pairs:
- //
- // LIST(NAColumnArray) groupUecColumns_
- // LIST(CostScalar) groupUecValues_
- //
- // ==> insert the NAColumnArray & uec total values for each
- // entry in colStats_
-
- // don't bother storing multicolumnuec info for fake histograms
- // but do set the originallly fake histogram flag to TRUE
- if ( (*colStats_)[i]->isFakeHistogram() )
- (*colStats_)[i]->setOrigFakeHist(TRUE);
- else
- {
- NAColumnArray cols = (*colStats_)[i]->getStatColumns() ;
- (*colStats_).groupUecColumns_.insert(cols) ;
-
- CostScalar uecs = (*colStats_)[i]->getTotalUec() ;
- (*colStats_).groupUecValues_.insert(uecs) ;
-
- if (CmpCommon::getDefault(USTAT_COLLECT_MC_SKEW_VALUES) == DF_ON)
- {
- MCSkewedValueList mcSkewedValueList = (*colStats_)[i]->getMCSkewedValueList() ;
- (*colStats_).groupMCSkewedValueLists_.insert(mcSkewedValueList) ;
- }
- }
-
- // MCH:
- // once we've stored the column/uec information, filter out the
- // multi-column histograms, since our synthesis code doesn't
- // handle them
- if (( (*colStats_)[i]->getStatColumns().entries() != 1) &&
- (!(*colStats_)[i]->isMCforHbasePartitioning()))
- {
- (*colStats_).removeAt(i) ;
- }
- else
- {
- i++ ; // in-place removal from a list is a bother!
- }
- }
+ } // outer for loop -- done with this COLSTATS, continue with next one
+ // ***********************************************************************
- // *************************************************************************
- // SECOND: do some fixup work to make sure the histograms maintain
- // the semantics we later expect (& enforce)
- // *************************************************************************
-
- // -------------------------------------------------------------------------
- // HISTINT fixup-code : char-string histograms
- // -------------------------------------------------------------------------
- // problem arises with HISTINTs that are for char* columns
- // here's what we can get:
- //
- // Rows Uec Value
- // ---- --- -----
- // 0 0 "value"
- // 10 5 "value"
- //
- // this is not good! The problem is our (lousy) encoding of
- // char strings into EncodedValue's
- //
- // After much deliberation, here's our current fix:
- //
- // Rows Uec Value
- // ---- --- -----
- // 0 0 "valu" <-- reduce the min value of 1st interval
- // 10 5 "value" by a little bit
- //
- // When we find two intervals like this where they aren't the
- // first intervals in the histogram, we simply merge them into
- // one interval (adding row/uec information) and continue; note
- // that in this case, we haven't actually lost any information;
- // we've merely made sense out of (the garbage) what we've got
- //
- // -------------------------------------------------------------------------
- // additional HISTINT fixup-code
- // -------------------------------------------------------------------------
- // 1. If there are zero or one HISTINTs, then set the HISTINTs to match
- // the max/min information contained in the COLSTATS object.
- //
- // 2. If there are any HISTINTs whose boundary values are out-of-order,
- // we abort with an an ERROR message.
- //
- // 3. If there is a NULL HISTINT at the end of the Histogram, then we
- // need to make sure there are *TWO* NULL HISTINTS, to preserve correct
- // histogram semantics for single-valued intervals.
- // -------------------------------------------------------------------------
-
- CollIndex j ;
- for ( i = 0 ; i < (*colStats_).entries() ; i++ )
- {
- // we only worry about histograms on char string columns
- // correction: it turns out that these semantically-deranged
- // ---------- histograms were being formed for other, non-char string
- // columns, so we commented out the code below
- // if ( colStats_[i]->getStatColumns()[0]->getType()->getTypeQualifier() !=
- // NA_CHARACTER_TYPE)
- // continue ; // not a string, skip to next
-
- ColStatsSharedPtr stats = (*colStats_)[i] ;
-
- HistogramSharedPtr hist = stats->getHistogramToModify() ;
- // histograms for key columns of a table that are not
- // referenced in the query are read in with zero intervals
- // (to conserve memory); however, internal
- // histogram-semantic checking code assumes that any
- // histogram which has zero intervals is FAKE; however
- // however, MDAM will not be chosen in the case where one of
- // the histograms for a key column is FAKE. Thus -- we will
- // avoid this entire issue by creating a single interval for
- // any Histograms that we read in that are empty.
- if ( hist->entries() < 2 )
- {
- if(stats->getMinValue() > stats->getMaxValue())
- {
- *CmpCommon::diags() << DgSqlCode(CATALOG_HISTOGRM_HISTINTS_TABLES_CONTAIN_BAD_VALUE)
- << DgString0("")
- << DgString1(stats->getStatColumns()[0]->getFullColRefNameAsAnsiString().data() );
-
- stats->createFakeHist();
- continue;
- }
-
- stats->setToSingleInterval ( stats->getMinValue(),
- stats->getMaxValue(),
- stats->getRowcount(),
- stats->getTotalUec() ) ;
- // now we have to undo some of the automatic flag-setting
- // of ColStats::setToSingleInterval()
- stats->setMinSetByPred (FALSE) ;
- stats->setMaxSetByPred (FALSE) ;
- stats->setShapeChanged (FALSE) ;
- continue ; // skip to next ColStats
- }
-
- // NB: we'll handle the first Interval last
- for ( j = 1 ; j < hist->entries()-1 ; /* no automatic increment */ )
- {
+ statsFetched_ = TRUE;
+ MonitorMemoryUsage_Exit((char*)mmPhase.data(), NULL, NULL, TRUE);
+ } // !statsFetched_
- if ( (*hist)[j].getUec() == 0 || (*hist)[j].getCardinality() == 0 )
- {
- hist->removeAt(j) ;
- continue ; // don't increment, loop again
- }
-
- // intervals must be in order!
- if ( (*hist)[j].getBoundary() > (*hist)[j+1].getBoundary() )
- {
- *CmpCommon::diags() <<
- DgSqlCode(CATALOG_HISTINTS_TABLES_CONTAIN_BAD_VALUES)
- << DgInt0(j)
- << DgInt1(j+1)
- << DgString1(stats->getStatColumns()[0]->getFullColRefNameAsAnsiString().data() );
-
- stats->createFakeHist();
- break ; // skip to next ColStats
- }
-
- if ( (*hist)[j].getBoundary() == (*hist)[j+1].getBoundary() )
- {
- // merge Intervals, if the two consecutive intervals have same
- // boundaries and these are not single valued (UEC > 1)
- // If there are more two single valued intervals, then merge
- // all except the last one.
- NABoolean mergeIntervals = FALSE;
-
- if (CmpCommon::getDefault(COMP_BOOL_79) == DF_ON)
- {
- mergeIntervals = TRUE;
-
- if( (j < (hist->entries() - 2)) && ((*hist)[j+1].getUec() == 1) &&
- ((*hist)[j+1].getBoundary() != (*hist)[j+2].getBoundary())
- ||
- (j == (hist->entries() - 2)) && ((*hist)[j+1].getUec() == 1) )
- mergeIntervals = FALSE;
- }
- else
- {
- if ( (*hist)[j+1].getUec() > 1)
- mergeIntervals = TRUE;
- }
-
- if ( mergeIntervals )
- {
- // if the intervals with same boundary are not SVI, just merge them
- // together.
- // Also do the merge, if there are more than one SVIs with same
- // encoded interval boundary. Example, we want to avoid intervals
- // such as
- // boundary inclusive_flag UEC
- // 12345.00 < 1
- // 12345.00 < 1
- // 12345.00 <= 1
- // These would be changed to
- // 12345.00 < 2
- // 12345.00 <= 1
- CostScalar combinedRows = (*hist)[ j ].getCardinality() +
- (*hist)[j+1].getCardinality() ;
- CostScalar combinedUec = (*hist)[ j ].getUec() +
- (*hist)[j+1].getUec() ;
- (*hist)[j].setCardAndUec (combinedRows, combinedUec) ;
- stats->setIsColWithBndryConflict(TRUE);
- hist->removeAt(j+1) ;
- }
- else
- {
- // for some reason, some SVI's aren't being
- // generated correctly!
- (*hist)[j].setBound
<TRUNCATED>