You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafodion.apache.org by hz...@apache.org on 2016/01/08 03:02:57 UTC

[1/3] incubator-trafodion git commit: [TRAFODION-1740] Add CQDs to UPDATE STATS for large tables

Repository: incubator-trafodion
Updated Branches:
  refs/heads/master 24d6bdb8a -> 0f302c427


[TRAFODION-1740] Add CQDs to UPDATE STATS for large tables


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/99a143f2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/99a143f2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/99a143f2

Branch: refs/heads/master
Commit: 99a143f29910c7d3c8e02fb1bf101214568bf80a
Parents: 7e1dfca
Author: Dave Birdsall <db...@apache.org>
Authored: Thu Jan 7 21:18:51 2016 +0000
Committer: Dave Birdsall <db...@apache.org>
Committed: Thu Jan 7 21:18:51 2016 +0000

----------------------------------------------------------------------
 core/sql/ustat/hs_globals.cpp | 75 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/99a143f2/core/sql/ustat/hs_globals.cpp
----------------------------------------------------------------------
diff --git a/core/sql/ustat/hs_globals.cpp b/core/sql/ustat/hs_globals.cpp
index aeb5433..59ae5d4 100644
--- a/core/sql/ustat/hs_globals.cpp
+++ b/core/sql/ustat/hs_globals.cpp
@@ -3718,6 +3718,9 @@ Lng32 HSSample::make(NABoolean rowCountIsEstimate, // input
 
     LM->StartTimer("Create/populate sample table");
     (void)getTimeDiff(TRUE);
+
+    NABoolean EspCQDUsed = FALSE;
+    NABoolean HBaseCQDsUsed = FALSE;
      
     sampleRowCount = sampleRowCnt;  // Save sample row count for HSSample object.
 
@@ -3762,6 +3765,68 @@ Lng32 HSSample::make(NABoolean rowCountIsEstimate, // input
     // For Hive tables the sample table used is a Trafodion table
     if (hs_globals->isHbaseTable || hs_globals->isHiveTable)
       {
+        // The optimal degree of parallelism for the LOAD or UPSERT is
+        // the number of partitions of the original table. Force that.
+        // Note that when the default for AGGRESSIVE_ESP_ALLOCATION_PER_CORE
+        // is permanently changed to 'ON', we may be able to remove this CQD.
+        if (hs_globals->objDef->getNumPartitions() > 1)
+          {
+            char temp[40];  // way more space than needed, but it's safe
+            sprintf(temp,"'%d'",hs_globals->objDef->getNumPartitions());
+            NAString EspsCQD = "CONTROL QUERY DEFAULT PARALLEL_NUM_ESPS ";
+            EspsCQD += temp;
+            HSFuncExecQuery(EspsCQD);
+            EspCQDUsed = TRUE;  // remember to reset later
+          }
+
+        // If the table is very large, we risk HBase time-outs because the
+        // sample scan doesn't return rows fast enough. In this case, we
+        // want to reduce the HBase row cache size to a smaller number to
+        // force more frequent returns. Experience shows that a value of
+        // '10' worked well with a 17.7 billion row table with 128 regions
+        // on six nodes (one million row sample). We'll assume a workable
+        // HBase cache size value scales linearly with the sampling ratio.
+        // That is, we'll assume the model:
+        //
+        //   workable value = (sample row count / actual row count) * c,
+        //   where c is chosen so that we get 10 when the sample row count
+        //   is 1,000,000 and the actual row count is 17.7 billion.
+        //
+        //   Solving for c, we get c = 10 * (17.7 billion/1 million).
+        //
+        // Note that the Generator does a similar calculation in
+        // Generator::setHBaseNumCacheRows. The calculation here is more
+        // conservative because we care more about getting UPDATE STATISTICS
+        // done without a timeout, trading off possible speed improvements
+        // by using a smaller cache size.
+        //
+        // Note that when we move to HBase 1.1, with its heartbeat protocol,
+        // this time-out problem goes away and we can remove these CQDs.
+        if (hs_globals->isHbaseTable)
+          {
+            double sampleRatio = (double)(sampleRowCnt) / hs_globals->actualRowCount;
+            double calibrationFactor = 10 * (17700000000/1000000);
+            Int64 workableCacheSize = (Int64)(sampleRatio * calibrationFactor);
+            if (workableCacheSize < 1)
+              workableCacheSize = 1;  // can't go below 1 unfortunately
+
+            Int32 max = getDefaultAsLong(HBASE_NUM_CACHE_ROWS_MAX);
+            if ((workableCacheSize < 10000) && // don't bother if 10000 works
+                (max == 10000))  // don't do it if user has already set this CQD
+              {
+                char temp1[40];  // way more space than needed, but it's safe
+                Lng32 wcs = (Lng32)workableCacheSize;  
+                sprintf(temp1,"'%d'",wcs);
+                NAString minCQD = "CONTROL QUERY HBASE_NUM_CACHE_ROWS_MIN ";
+                minCQD += temp1;
+                HSFuncExecQuery(minCQD); 
+                NAString maxCQD = "CONTROL QUERY HBASE_NUM_CACHE_ROWS_MAX ";
+                maxCQD += temp1;
+                HSFuncExecQuery(maxCQD); 
+                HBaseCQDsUsed = TRUE;  // remember to reset these later          
+              }
+          }          
+
         if (CmpCommon::getDefault(TRAF_LOAD_USE_FOR_STATS) == DF_ON)
           {
             insertType = "LOAD WITH NO OUTPUT, NO RECOVERY, NO POPULATE INDEXES, NO DUPLICATE CHECK INTO ";
@@ -3904,6 +3969,16 @@ Lng32 HSSample::make(NABoolean rowCountIsEstimate, // input
     // Reset the IDENTITY column override CQD
     HSFuncExecQuery("CONTROL QUERY DEFAULT OVERRIDE_GENERATED_IDENTITY_VALUES RESET");                                                                      
 
+    if (HBaseCQDsUsed)
+      {
+        HSFuncExecQuery("CONTROL QUERY DEFAULT HBASE_NUM_CACHE_ROWS_MIN RESET");
+        HSFuncExecQuery("CONTROL QUERY DEFAULT HBASE_NUM_CACHE_ROWS_MAX RESET");
+      }
+    if (EspCQDUsed)
+      {
+        HSFuncExecQuery("CONTROL QUERY DEFAULT PARALLEL_NUM_ESPS RESET");
+      }
+
     if (retcode) TM->Rollback();
     else         TM->Commit();
 


[3/3] incubator-trafodion git commit: [TRAFODION-1740] Add CQDs to UPDATE STATS for large tables

Posted by hz...@apache.org.
[TRAFODION-1740] Add CQDs to UPDATE STATS for large tables


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/0f302c42
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/0f302c42
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/0f302c42

Branch: refs/heads/master
Commit: 0f302c427565aa1b6e77da4099711f37f485850e
Parents: 24d6bdb 384f8fa
Author: Hans Zeller <hz...@apache.org>
Authored: Fri Jan 8 02:01:37 2016 +0000
Committer: Hans Zeller <hz...@apache.org>
Committed: Fri Jan 8 02:01:37 2016 +0000

----------------------------------------------------------------------
 core/sql/ustat/hs_globals.cpp | 75 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)
----------------------------------------------------------------------



[2/3] incubator-trafodion git commit: Rework

Posted by hz...@apache.org.
Rework


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/384f8fa8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/384f8fa8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/384f8fa8

Branch: refs/heads/master
Commit: 384f8fa8e104b9aca9687a676c2f876fe1f4e436
Parents: 99a143f
Author: Dave Birdsall <db...@apache.org>
Authored: Thu Jan 7 23:26:05 2016 +0000
Committer: Dave Birdsall <db...@apache.org>
Committed: Thu Jan 7 23:26:05 2016 +0000

----------------------------------------------------------------------
 core/sql/ustat/hs_globals.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/384f8fa8/core/sql/ustat/hs_globals.cpp
----------------------------------------------------------------------
diff --git a/core/sql/ustat/hs_globals.cpp b/core/sql/ustat/hs_globals.cpp
index 59ae5d4..75789e9 100644
--- a/core/sql/ustat/hs_globals.cpp
+++ b/core/sql/ustat/hs_globals.cpp
@@ -3817,10 +3817,10 @@ Lng32 HSSample::make(NABoolean rowCountIsEstimate, // input
                 char temp1[40];  // way more space than needed, but it's safe
                 Lng32 wcs = (Lng32)workableCacheSize;  
                 sprintf(temp1,"'%d'",wcs);
-                NAString minCQD = "CONTROL QUERY HBASE_NUM_CACHE_ROWS_MIN ";
+                NAString minCQD = "CONTROL QUERY DEFAULT HBASE_NUM_CACHE_ROWS_MIN ";
                 minCQD += temp1;
                 HSFuncExecQuery(minCQD); 
-                NAString maxCQD = "CONTROL QUERY HBASE_NUM_CACHE_ROWS_MAX ";
+                NAString maxCQD = "CONTROL QUERY DEFAULT HBASE_NUM_CACHE_ROWS_MAX ";
                 maxCQD += temp1;
                 HSFuncExecQuery(maxCQD); 
                 HBaseCQDsUsed = TRUE;  // remember to reset these later