You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafodion.apache.org by db...@apache.org on 2016/11/14 20:02:46 UTC
[1/2] incubator-trafodion git commit: [TRAFODION-2352] UPDATE STATS
may fail with error 8446 on Hive tables
Repository: incubator-trafodion
Updated Branches:
refs/heads/master 34772bc17 -> 5162e5a57
[TRAFODION-2352] UPDATE STATS may fail with error 8446 on Hive tables
Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/d2e7567b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/d2e7567b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/d2e7567b
Branch: refs/heads/master
Commit: d2e7567b0b1e0b090ac0bfed1f5fa4183bc6f376
Parents: 0cf28ae
Author: Dave Birdsall <db...@apache.org>
Authored: Thu Nov 10 23:51:58 2016 +0000
Committer: Dave Birdsall <db...@apache.org>
Committed: Thu Nov 10 23:51:58 2016 +0000
----------------------------------------------------------------------
core/sql/ustat/hs_globals.cpp | 87 +++++++++++++++++++++++++++++++-------
core/sql/ustat/hs_globals.h | 18 +++++++-
core/sql/ustat/hs_update.cpp | 1 +
3 files changed, 88 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d2e7567b/core/sql/ustat/hs_globals.cpp
----------------------------------------------------------------------
diff --git a/core/sql/ustat/hs_globals.cpp b/core/sql/ustat/hs_globals.cpp
index b0dbfef..eedfe67 100644
--- a/core/sql/ustat/hs_globals.cpp
+++ b/core/sql/ustat/hs_globals.cpp
@@ -589,12 +589,65 @@ NABoolean HSGlobalsClass::setHBaseCacheSize(double sampleRatio)
NAString maxCQD = "CONTROL QUERY DEFAULT HBASE_NUM_CACHE_ROWS_MAX ";
maxCQD += temp1;
HSFuncExecQuery(maxCQD);
+ hbaseCacheSizeCQDsSet_ = TRUE;
return TRUE;
}
else
return FALSE;
}
+
+// If a Hive table has very long character columns, we might get
+// a SQL error 8446 when scanning it. The way around that is to
+// set CQD HIVE_MAX_STRING_LENGTH_IN_BYTES to the longest string
+// length in the table. This is typically done by the user in
+// the sqlci or trafci session. However we need to propagate
+// it to our child tdm_arkcmp. This method does that.
+NABoolean HSGlobalsClass::setHiveMaxStringLengthInBytes(void)
+{
+ NABoolean rc = FALSE;
+ if (isHiveTable)
+ {
+ NADefaults &defs = ActiveSchemaDB()->getDefaults();
+ if (defs.getProvenance(HIVE_MAX_STRING_LENGTH_IN_BYTES) >=
+ NADefaults::SET_BY_CQD)
+ {
+ char temp1[40]; // way more space than needed, but it's safe
+ UInt32 hiveMaxStringLengthInBytes =
+ ActiveSchemaDB()->getDefaults().getAsULong(HIVE_MAX_STRING_LENGTH_IN_BYTES);
+
+ sprintf(temp1,"'%u'",hiveMaxStringLengthInBytes);
+ NAString theCQD = "CONTROL QUERY DEFAULT HIVE_MAX_STRING_LENGTH_IN_BYTES ";
+ theCQD += temp1;
+ HSFuncExecQuery(theCQD);
+
+ hiveMaxStringLengthCQDSet_ = TRUE;
+ rc = TRUE;
+ }
+ }
+
+ return rc;
+}
+
+
+// If we set any CQDs in CollectStatistics that need to be
+// reset when we are done, do that here.
+void HSGlobalsClass::resetCQDs(void)
+{
+ if (hbaseCacheSizeCQDsSet_)
+ {
+ HSFuncExecQuery("CONTROL QUERY DEFAULT HBASE_NUM_CACHE_ROWS_MIN RESET");
+ HSFuncExecQuery("CONTROL QUERY DEFAULT HBASE_NUM_CACHE_ROWS_MAX RESET");
+ hbaseCacheSizeCQDsSet_ = FALSE;
+ }
+ if (hiveMaxStringLengthCQDSet_)
+ {
+ HSFuncExecQuery("CONTROL QUERY DEFAULT HIVE_MAX_STRING_LENGTH_IN_BYTES RESET");
+ hiveMaxStringLengthCQDSet_ = FALSE;
+ }
+}
+
+
// rearrange the MCs so that the larger groups are listed first
// and the ones that will not be processed (not enough memory) are
// listed last, so to simplify the rest of the ordering algorithm
@@ -2896,7 +2949,9 @@ HSGlobalsClass::HSGlobalsClass(ComDiagsArea &diags)
jitLogOn(FALSE),
isUpdatestatsStmt(FALSE),
maxCharColumnLengthInBytes(ActiveSchemaDB()->getDefaults().
- getAsLong(USTAT_MAX_CHAR_COL_LENGTH_IN_BYTES))
+ getAsLong(USTAT_MAX_CHAR_COL_LENGTH_IN_BYTES)),
+ hbaseCacheSizeCQDsSet_(FALSE),
+ hiveMaxStringLengthCQDSet_(FALSE)
{
// Must add the context first in the constructor.
contID_ = AddHSContext(this);
@@ -3980,7 +4035,7 @@ Lng32 HSSample::make(NABoolean rowCountIsEstimate, // input
// in a batch) to avoid scanner timeout. Reset these after the sample query
// has executed.
if (hs_globals->isHbaseTable)
- HBaseCQDsUsed = HSGlobalsClass::setHBaseCacheSize(samplePercent);
+ HBaseCQDsUsed = hs_globals->setHBaseCacheSize(samplePercent);
if (CmpCommon::getDefault(TRAF_LOAD_USE_FOR_STATS) == DF_ON)
{
@@ -4137,8 +4192,7 @@ Lng32 HSSample::make(NABoolean rowCountIsEstimate, // input
if (HBaseCQDsUsed)
{
- HSFuncExecQuery("CONTROL QUERY DEFAULT HBASE_NUM_CACHE_ROWS_MIN RESET");
- HSFuncExecQuery("CONTROL QUERY DEFAULT HBASE_NUM_CACHE_ROWS_MAX RESET");
+ hs_globals->resetCQDs();
}
if (EspCQDUsed)
{
@@ -5187,6 +5241,9 @@ Lng32 HSGlobalsClass::CollectStatistics()
HSSample sampleTable(objDef, optFlags & SAMPLE_REQUESTED, sampleTblPercent);
// Initialize variables for sample table. May not be used.
+ // set CQD for Hive if needed
+ setHiveMaxStringLengthInBytes();
+
/*======================================================================*/
/* Perform internal sort if enabled. */
/*======================================================================*/
@@ -5446,11 +5503,6 @@ Lng32 HSGlobalsClass::CollectStatistics()
retcode = readColumnsIntoMem(&cursor, maxRowsToRead);
HSHandleError(retcode);
- if (hbaseCQDsUsed)
- {
- HSFuncExecQuery("CONTROL QUERY DEFAULT HBASE_NUM_CACHE_ROWS_MIN RESET");
- HSFuncExecQuery("CONTROL QUERY DEFAULT HBASE_NUM_CACHE_ROWS_MAX RESET");
- }
checkTime("after reading pending columns into memory for internal sort");
columnSeconds = getTimeDiff() / numColsToProcess; // saved for automation
@@ -12602,12 +12654,11 @@ Int32 HSGlobalsClass::processIUSColumn(T* ptr,
logCBF("after the above key, cbf is:", cbf);
}
#endif
-
- diagsArea << DgSqlCode(UERR_IUS_INSERT_NONMFV_OVERFLOW)
- << DgString0(smplGroup->colSet[0].colname->data());
-
-
+
if (LM->LogNeeded()) {
+ // only issue the warning if logging is turned on
+ diagsArea << DgSqlCode(UERR_IUS_INSERT_NONMFV_OVERFLOW)
+ << DgString0(smplGroup->colSet[0].colname->data());
LM->Log("NONMFV overflow");
LM->StopTimer(); // Need both of these; there are
LM->StopTimer(); // 2 outstanding timer events
@@ -12739,8 +12790,12 @@ if ( x[0] == (unsigned char)255 && x[1] == (unsigned char)127 ) {
// non-mfv value overflows to mfv. bail out.
if (insert_status == CountingBloomFilter::NEW_MFV) {
- diagsArea << DgSqlCode(UERR_IUS_INSERT_NONMFV_OVERFLOW)
- << DgString0(smplGroup->colSet[0].colname->data());
+ if (LM->LogNeeded())
+ {
+ // only issue warning if logging is turned on
+ diagsArea << DgSqlCode(UERR_IUS_INSERT_NONMFV_OVERFLOW)
+ << DgString0(smplGroup->colSet[0].colname->data());
+ }
LM->StopTimer();
return UERR_IUS_INSERT_NONMFV_OVERFLOW;
}
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d2e7567b/core/sql/ustat/hs_globals.h
----------------------------------------------------------------------
diff --git a/core/sql/ustat/hs_globals.h b/core/sql/ustat/hs_globals.h
index 90aeac9..b9e50aa 100644
--- a/core/sql/ustat/hs_globals.h
+++ b/core/sql/ustat/hs_globals.h
@@ -1339,7 +1339,13 @@ public:
// Set CQDs controlling min/max HBase cache size to minimize risk of
// scanner timeout.
- static NABoolean setHBaseCacheSize(double sampleRatio);
+ NABoolean setHBaseCacheSize(double sampleRatio);
+
+ // Set CQD HIVE_MAX_STRING_LENGTH_IN_BYTES if necessary
+ NABoolean setHiveMaxStringLengthInBytes(void);
+
+ // Reset any CQDs set above
+ void resetCQDs(void);
// Static fns for determining minimum table sizes for sampling, and for
// using lowest sampling rate, under default sampling protocol.
@@ -1656,10 +1662,18 @@ public:
for one instance of persistent
sample table */
- NABoolean sample_I_generated;
+ NABoolean sample_I_generated;
Lng32 maxCharColumnLengthInBytes; /* the value of USTAT_MAX_CHAR_COL_LENGTH_IN_BYTES */
+ // Error recovery flags so we can reset CQDs that we set
+ // during CollectStatistics() (We do this because the
+ // HSHandleError macro commonly used makes it hard to
+ // do the resets reliably in CollectStatistics itself. Sigh.)
+
+ NABoolean hbaseCacheSizeCQDsSet_;
+ NABoolean hiveMaxStringLengthCQDSet_;
+
private:
//++ MV
// special parser flags (see contr. and destr.)
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d2e7567b/core/sql/ustat/hs_update.cpp
----------------------------------------------------------------------
diff --git a/core/sql/ustat/hs_update.cpp b/core/sql/ustat/hs_update.cpp
index 4a4f1a4..38b0802 100644
--- a/core/sql/ustat/hs_update.cpp
+++ b/core/sql/ustat/hs_update.cpp
@@ -457,6 +457,7 @@ Lng32 UpdateStats(char *input, NABoolean requestedByCompiler)
if (hs_globals_obj.StatsNeeded())
{
retcode = hs_globals_obj.CollectStatistics();
+ hs_globals_obj.resetCQDs();
HSExitIfError(retcode);
}
else if (hs_globals_obj.optFlags & IUS_PERSIST)
[2/2] incubator-trafodion git commit: Merge [TRAFODION-2352] PR 830
Fix error 8446 in UPDATE STATS on Hive tables
Posted by db...@apache.org.
Merge [TRAFODION-2352] PR 830 Fix error 8446 in UPDATE STATS on Hive tables
Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/5162e5a5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/5162e5a5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/5162e5a5
Branch: refs/heads/master
Commit: 5162e5a577ca9c353714312a28a588bfdccede31
Parents: 34772bc d2e7567
Author: Dave Birdsall <db...@apache.org>
Authored: Mon Nov 14 20:01:32 2016 +0000
Committer: Dave Birdsall <db...@apache.org>
Committed: Mon Nov 14 20:01:32 2016 +0000
----------------------------------------------------------------------
core/sql/ustat/hs_globals.cpp | 87 +++++++++++++++++++++++++++++++-------
core/sql/ustat/hs_globals.h | 18 +++++++-
core/sql/ustat/hs_update.cpp | 1 +
3 files changed, 88 insertions(+), 18 deletions(-)
----------------------------------------------------------------------