You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafodion.apache.org by db...@apache.org on 2016/11/14 20:02:46 UTC

[1/2] incubator-trafodion git commit: [TRAFODION-2352] UPDATE STATS may fail with error 8446 on Hive tables

Repository: incubator-trafodion
Updated Branches:
  refs/heads/master 34772bc17 -> 5162e5a57


[TRAFODION-2352] UPDATE STATS may fail with error 8446 on Hive tables


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/d2e7567b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/d2e7567b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/d2e7567b

Branch: refs/heads/master
Commit: d2e7567b0b1e0b090ac0bfed1f5fa4183bc6f376
Parents: 0cf28ae
Author: Dave Birdsall <db...@apache.org>
Authored: Thu Nov 10 23:51:58 2016 +0000
Committer: Dave Birdsall <db...@apache.org>
Committed: Thu Nov 10 23:51:58 2016 +0000

----------------------------------------------------------------------
 core/sql/ustat/hs_globals.cpp | 87 +++++++++++++++++++++++++++++++-------
 core/sql/ustat/hs_globals.h   | 18 +++++++-
 core/sql/ustat/hs_update.cpp  |  1 +
 3 files changed, 88 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d2e7567b/core/sql/ustat/hs_globals.cpp
----------------------------------------------------------------------
diff --git a/core/sql/ustat/hs_globals.cpp b/core/sql/ustat/hs_globals.cpp
index b0dbfef..eedfe67 100644
--- a/core/sql/ustat/hs_globals.cpp
+++ b/core/sql/ustat/hs_globals.cpp
@@ -589,12 +589,65 @@ NABoolean HSGlobalsClass::setHBaseCacheSize(double sampleRatio)
       NAString maxCQD = "CONTROL QUERY DEFAULT HBASE_NUM_CACHE_ROWS_MAX ";
       maxCQD += temp1;
       HSFuncExecQuery(maxCQD);
+      hbaseCacheSizeCQDsSet_ = TRUE;
       return TRUE;
     }
   else
     return FALSE;
 }
 
+
+// If a Hive table has very long character columns, we might get
+// a SQL error 8446 when scanning it. The way around that is to
+// set CQD HIVE_MAX_STRING_LENGTH_IN_BYTES to the longest string
+// length in the table. This is typically done by the user in
+// the sqlci or trafci session. However we need to propagate
+// it to our child tdm_arkcmp. This method does that.
+NABoolean HSGlobalsClass::setHiveMaxStringLengthInBytes(void)
+{
+  NABoolean rc = FALSE;
+  if (isHiveTable)
+    {
+      NADefaults &defs = ActiveSchemaDB()->getDefaults();
+      if (defs.getProvenance(HIVE_MAX_STRING_LENGTH_IN_BYTES) >= 
+          NADefaults::SET_BY_CQD)
+        {
+          char temp1[40];  // way more space than needed, but it's safe
+          UInt32 hiveMaxStringLengthInBytes = 
+            ActiveSchemaDB()->getDefaults().getAsULong(HIVE_MAX_STRING_LENGTH_IN_BYTES);
+
+          sprintf(temp1,"'%u'",hiveMaxStringLengthInBytes);
+          NAString theCQD = "CONTROL QUERY DEFAULT HIVE_MAX_STRING_LENGTH_IN_BYTES ";
+          theCQD += temp1;
+          HSFuncExecQuery(theCQD);
+
+          hiveMaxStringLengthCQDSet_ = TRUE;
+          rc = TRUE;
+        }
+    }
+
+  return rc;
+}
+
+
+// If we set any CQDs in CollectStatistics that need to be
+// reset when we are done, do that here.
+void HSGlobalsClass::resetCQDs(void)
+{
+  if (hbaseCacheSizeCQDsSet_)
+    {
+      HSFuncExecQuery("CONTROL QUERY DEFAULT HBASE_NUM_CACHE_ROWS_MIN RESET");
+      HSFuncExecQuery("CONTROL QUERY DEFAULT HBASE_NUM_CACHE_ROWS_MAX RESET");
+      hbaseCacheSizeCQDsSet_ = FALSE;
+    }
+  if (hiveMaxStringLengthCQDSet_)
+    {
+      HSFuncExecQuery("CONTROL QUERY DEFAULT HIVE_MAX_STRING_LENGTH_IN_BYTES RESET");
+      hiveMaxStringLengthCQDSet_ = FALSE;
+    }
+}
+
+
 // rearrange the MCs so that the larger groups are listed first
 // and the ones that will not be processed (not enough memory) are
 // listed last, so to simplify the rest of the ordering algorithm
@@ -2896,7 +2949,9 @@ HSGlobalsClass::HSGlobalsClass(ComDiagsArea &diags)
     jitLogOn(FALSE),
     isUpdatestatsStmt(FALSE),
     maxCharColumnLengthInBytes(ActiveSchemaDB()->getDefaults().
-               getAsLong(USTAT_MAX_CHAR_COL_LENGTH_IN_BYTES))
+               getAsLong(USTAT_MAX_CHAR_COL_LENGTH_IN_BYTES)),
+    hbaseCacheSizeCQDsSet_(FALSE),
+    hiveMaxStringLengthCQDSet_(FALSE)
   {
     // Must add the context first in the constructor.
     contID_ = AddHSContext(this);
@@ -3980,7 +4035,7 @@ Lng32 HSSample::make(NABoolean rowCountIsEstimate, // input
         // in a batch) to avoid scanner timeout. Reset these after the sample query
         // has executed.
         if (hs_globals->isHbaseTable)
-          HBaseCQDsUsed = HSGlobalsClass::setHBaseCacheSize(samplePercent);
+          HBaseCQDsUsed = hs_globals->setHBaseCacheSize(samplePercent);
 
         if (CmpCommon::getDefault(TRAF_LOAD_USE_FOR_STATS) == DF_ON)
           {
@@ -4137,8 +4192,7 @@ Lng32 HSSample::make(NABoolean rowCountIsEstimate, // input
 
     if (HBaseCQDsUsed)
       {
-        HSFuncExecQuery("CONTROL QUERY DEFAULT HBASE_NUM_CACHE_ROWS_MIN RESET");
-        HSFuncExecQuery("CONTROL QUERY DEFAULT HBASE_NUM_CACHE_ROWS_MAX RESET");
+        hs_globals->resetCQDs();
       }
     if (EspCQDUsed)
       {
@@ -5187,6 +5241,9 @@ Lng32 HSGlobalsClass::CollectStatistics()
     HSSample sampleTable(objDef, optFlags & SAMPLE_REQUESTED, sampleTblPercent);
       // Initialize variables for sample table.  May not be used.
 
+    // set CQD for Hive if needed
+    setHiveMaxStringLengthInBytes();
+
     /*======================================================================*/
     /* Perform internal sort if enabled.                                    */
     /*======================================================================*/
@@ -5446,11 +5503,6 @@ Lng32 HSGlobalsClass::CollectStatistics()
    
             retcode = readColumnsIntoMem(&cursor, maxRowsToRead);
             HSHandleError(retcode);
-            if (hbaseCQDsUsed)
-              {
-                HSFuncExecQuery("CONTROL QUERY DEFAULT HBASE_NUM_CACHE_ROWS_MIN RESET");
-                HSFuncExecQuery("CONTROL QUERY DEFAULT HBASE_NUM_CACHE_ROWS_MAX RESET");
-              }
             checkTime("after reading pending columns into memory for internal sort");
             columnSeconds = getTimeDiff() / numColsToProcess;  // saved for automation
 
@@ -12602,12 +12654,11 @@ Int32 HSGlobalsClass::processIUSColumn(T* ptr,
                logCBF("after the above key, cbf is:", cbf);
             }
 #endif
-
-            diagsArea << DgSqlCode(UERR_IUS_INSERT_NONMFV_OVERFLOW)
-                      << DgString0(smplGroup->colSet[0].colname->data());
-
-             
+  
             if (LM->LogNeeded()) {
+              // only issue the warning if logging is turned on
+              diagsArea << DgSqlCode(UERR_IUS_INSERT_NONMFV_OVERFLOW)
+                << DgString0(smplGroup->colSet[0].colname->data());
               LM->Log("NONMFV overflow");
               LM->StopTimer();  // Need both of these; there are
               LM->StopTimer();  //   2 outstanding timer events
@@ -12739,8 +12790,12 @@ if ( x[0] == (unsigned char)255 && x[1] == (unsigned char)127 ) {
 
       // non-mfv value overflows to mfv. bail out.
       if (insert_status == CountingBloomFilter::NEW_MFV) {
-         diagsArea << DgSqlCode(UERR_IUS_INSERT_NONMFV_OVERFLOW)
-                   << DgString0(smplGroup->colSet[0].colname->data());
+         if (LM->LogNeeded())
+           {
+             // only issue warning if logging is turned on
+             diagsArea << DgSqlCode(UERR_IUS_INSERT_NONMFV_OVERFLOW)
+                       << DgString0(smplGroup->colSet[0].colname->data());
+           }
          LM->StopTimer();
          return UERR_IUS_INSERT_NONMFV_OVERFLOW;
       }

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d2e7567b/core/sql/ustat/hs_globals.h
----------------------------------------------------------------------
diff --git a/core/sql/ustat/hs_globals.h b/core/sql/ustat/hs_globals.h
index 90aeac9..b9e50aa 100644
--- a/core/sql/ustat/hs_globals.h
+++ b/core/sql/ustat/hs_globals.h
@@ -1339,7 +1339,13 @@ public:
 
     // Set CQDs controlling min/max HBase cache size to minimize risk of
     // scanner timeout.
-    static NABoolean setHBaseCacheSize(double sampleRatio);
+    NABoolean setHBaseCacheSize(double sampleRatio);
+
+    // Set CQD HIVE_MAX_STRING_LENGTH_IN_BYTES if necessary
+    NABoolean setHiveMaxStringLengthInBytes(void);
+
+    // Reset any CQDs set above
+    void resetCQDs(void);
 
     // Static fns for determining minimum table sizes for sampling, and for
     // using lowest sampling rate, under default sampling protocol.
@@ -1656,10 +1662,18 @@ public:
                                                           for one instance of persistent 
                                                           sample table */
 
-     NABoolean            sample_I_generated;
+    NABoolean            sample_I_generated;
 
     Lng32          maxCharColumnLengthInBytes;   /* the value of USTAT_MAX_CHAR_COL_LENGTH_IN_BYTES */
 
+    // Error recovery flags so we can reset CQDs that we set
+    // during CollectStatistics() (We do this because the
+    // HSHandleError macro commonly used makes it hard to
+    // do the resets reliably in CollectStatistics itself. Sigh.)
+
+    NABoolean hbaseCacheSizeCQDsSet_;
+    NABoolean hiveMaxStringLengthCQDSet_;
+
 private:
     //++ MV
     // special parser flags (see contr. and destr.)

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d2e7567b/core/sql/ustat/hs_update.cpp
----------------------------------------------------------------------
diff --git a/core/sql/ustat/hs_update.cpp b/core/sql/ustat/hs_update.cpp
index 4a4f1a4..38b0802 100644
--- a/core/sql/ustat/hs_update.cpp
+++ b/core/sql/ustat/hs_update.cpp
@@ -457,6 +457,7 @@ Lng32 UpdateStats(char *input, NABoolean requestedByCompiler)
     if (hs_globals_obj.StatsNeeded())
       {
         retcode = hs_globals_obj.CollectStatistics();
+        hs_globals_obj.resetCQDs();
         HSExitIfError(retcode);
       }
     else if (hs_globals_obj.optFlags & IUS_PERSIST)


[2/2] incubator-trafodion git commit: Merge [TRAFODION-2352] PR 830 Fix error 8446 in UPDATE STATS on Hive tables

Posted by db...@apache.org.
Merge [TRAFODION-2352] PR 830 Fix error 8446 in UPDATE STATS on Hive tables


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/5162e5a5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/5162e5a5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/5162e5a5

Branch: refs/heads/master
Commit: 5162e5a577ca9c353714312a28a588bfdccede31
Parents: 34772bc d2e7567
Author: Dave Birdsall <db...@apache.org>
Authored: Mon Nov 14 20:01:32 2016 +0000
Committer: Dave Birdsall <db...@apache.org>
Committed: Mon Nov 14 20:01:32 2016 +0000

----------------------------------------------------------------------
 core/sql/ustat/hs_globals.cpp | 87 +++++++++++++++++++++++++++++++-------
 core/sql/ustat/hs_globals.h   | 18 +++++++-
 core/sql/ustat/hs_update.cpp  |  1 +
 3 files changed, 88 insertions(+), 18 deletions(-)
----------------------------------------------------------------------