You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafodion.apache.org by hz...@apache.org on 2016/01/08 03:02:57 UTC
[1/3] incubator-trafodion git commit: [TRAFODION-1740] Add CQDs to
UPDATE STATS for large tables
Repository: incubator-trafodion
Updated Branches:
refs/heads/master 24d6bdb8a -> 0f302c427
[TRAFODION-1740] Add CQDs to UPDATE STATS for large tables
Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/99a143f2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/99a143f2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/99a143f2
Branch: refs/heads/master
Commit: 99a143f29910c7d3c8e02fb1bf101214568bf80a
Parents: 7e1dfca
Author: Dave Birdsall <db...@apache.org>
Authored: Thu Jan 7 21:18:51 2016 +0000
Committer: Dave Birdsall <db...@apache.org>
Committed: Thu Jan 7 21:18:51 2016 +0000
----------------------------------------------------------------------
core/sql/ustat/hs_globals.cpp | 75 ++++++++++++++++++++++++++++++++++++++
1 file changed, 75 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/99a143f2/core/sql/ustat/hs_globals.cpp
----------------------------------------------------------------------
diff --git a/core/sql/ustat/hs_globals.cpp b/core/sql/ustat/hs_globals.cpp
index aeb5433..59ae5d4 100644
--- a/core/sql/ustat/hs_globals.cpp
+++ b/core/sql/ustat/hs_globals.cpp
@@ -3718,6 +3718,9 @@ Lng32 HSSample::make(NABoolean rowCountIsEstimate, // input
LM->StartTimer("Create/populate sample table");
(void)getTimeDiff(TRUE);
+
+ NABoolean EspCQDUsed = FALSE;
+ NABoolean HBaseCQDsUsed = FALSE;
sampleRowCount = sampleRowCnt; // Save sample row count for HSSample object.
@@ -3762,6 +3765,68 @@ Lng32 HSSample::make(NABoolean rowCountIsEstimate, // input
// For Hive tables the sample table used is a Trafodion table
if (hs_globals->isHbaseTable || hs_globals->isHiveTable)
{
+ // The optimal degree of parallelism for the LOAD or UPSERT is
+ // the number of partitions of the original table. Force that.
+ // Note that when the default for AGGRESSIVE_ESP_ALLOCATION_PER_CORE
+ // is permanently changed to 'ON', we may be able to remove this CQD.
+ if (hs_globals->objDef->getNumPartitions() > 1)
+ {
+ char temp[40]; // way more space than needed, but it's safe
+ sprintf(temp,"'%d'",hs_globals->objDef->getNumPartitions());
+ NAString EspsCQD = "CONTROL QUERY DEFAULT PARALLEL_NUM_ESPS ";
+ EspsCQD += temp;
+ HSFuncExecQuery(EspsCQD);
+ EspCQDUsed = TRUE; // remember to reset later
+ }
+
+ // If the table is very large, we risk HBase time-outs because the
+ // sample scan doesn't return rows fast enough. In this case, we
+ // want to reduce the HBase row cache size to a smaller number to
+ // force more frequent returns. Experience shows that a value of
+ // '10' worked well with a 17.7 billion row table with 128 regions
+ // on six nodes (one million row sample). We'll assume a workable
+ // HBase cache size value scales linearly with the sampling ratio.
+ // That is, we'll assume the model:
+ //
+ // workable value = (sample row count / actual row count) * c,
+ // where c is chosen so that we get 10 when the sample row count
+ // is 1,000,000 and the actual row count is 17.7 billion.
+ //
+ // Solving for c, we get c = 10 * (17.7 billion/1 million).
+ //
+ // Note that the Generator does a similar calculation in
+ // Generator::setHBaseNumCacheRows. The calculation here is more
+ // conservative because we care more about getting UPDATE STATISTICS
+ // done without a timeout, trading off possible speed improvements
+ // by using a smaller cache size.
+ //
+ // Note that when we move to HBase 1.1, with its heartbeat protocol,
+ // this time-out problem goes away and we can remove these CQDs.
+ if (hs_globals->isHbaseTable)
+ {
+ double sampleRatio = (double)(sampleRowCnt) / hs_globals->actualRowCount;
+ double calibrationFactor = 10 * (17700000000/1000000);
+ Int64 workableCacheSize = (Int64)(sampleRatio * calibrationFactor);
+ if (workableCacheSize < 1)
+ workableCacheSize = 1; // can't go below 1 unfortunately
+
+ Int32 max = getDefaultAsLong(HBASE_NUM_CACHE_ROWS_MAX);
+ if ((workableCacheSize < 10000) && // don't bother if 10000 works
+ (max == 10000)) // don't do it if user has already set this CQD
+ {
+ char temp1[40]; // way more space than needed, but it's safe
+ Lng32 wcs = (Lng32)workableCacheSize;
+ sprintf(temp1,"'%d'",wcs);
+ NAString minCQD = "CONTROL QUERY HBASE_NUM_CACHE_ROWS_MIN ";
+ minCQD += temp1;
+ HSFuncExecQuery(minCQD);
+ NAString maxCQD = "CONTROL QUERY HBASE_NUM_CACHE_ROWS_MAX ";
+ maxCQD += temp1;
+ HSFuncExecQuery(maxCQD);
+ HBaseCQDsUsed = TRUE; // remember to reset these later
+ }
+ }
+
if (CmpCommon::getDefault(TRAF_LOAD_USE_FOR_STATS) == DF_ON)
{
insertType = "LOAD WITH NO OUTPUT, NO RECOVERY, NO POPULATE INDEXES, NO DUPLICATE CHECK INTO ";
@@ -3904,6 +3969,16 @@ Lng32 HSSample::make(NABoolean rowCountIsEstimate, // input
// Reset the IDENTITY column override CQD
HSFuncExecQuery("CONTROL QUERY DEFAULT OVERRIDE_GENERATED_IDENTITY_VALUES RESET");
+ if (HBaseCQDsUsed)
+ {
+ HSFuncExecQuery("CONTROL QUERY DEFAULT HBASE_NUM_CACHE_ROWS_MIN RESET");
+ HSFuncExecQuery("CONTROL QUERY DEFAULT HBASE_NUM_CACHE_ROWS_MAX RESET");
+ }
+ if (EspCQDUsed)
+ {
+ HSFuncExecQuery("CONTROL QUERY DEFAULT PARALLEL_NUM_ESPS RESET");
+ }
+
if (retcode) TM->Rollback();
else TM->Commit();
[3/3] incubator-trafodion git commit: [TRAFODION-1740] Add CQDs to
UPDATE STATS for large tables
Posted by hz...@apache.org.
[TRAFODION-1740] Add CQDs to UPDATE STATS for large tables
Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/0f302c42
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/0f302c42
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/0f302c42
Branch: refs/heads/master
Commit: 0f302c427565aa1b6e77da4099711f37f485850e
Parents: 24d6bdb 384f8fa
Author: Hans Zeller <hz...@apache.org>
Authored: Fri Jan 8 02:01:37 2016 +0000
Committer: Hans Zeller <hz...@apache.org>
Committed: Fri Jan 8 02:01:37 2016 +0000
----------------------------------------------------------------------
core/sql/ustat/hs_globals.cpp | 75 ++++++++++++++++++++++++++++++++++++++
1 file changed, 75 insertions(+)
----------------------------------------------------------------------
[2/3] incubator-trafodion git commit: Rework
Posted by hz...@apache.org.
Rework
Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/384f8fa8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/384f8fa8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/384f8fa8
Branch: refs/heads/master
Commit: 384f8fa8e104b9aca9687a676c2f876fe1f4e436
Parents: 99a143f
Author: Dave Birdsall <db...@apache.org>
Authored: Thu Jan 7 23:26:05 2016 +0000
Committer: Dave Birdsall <db...@apache.org>
Committed: Thu Jan 7 23:26:05 2016 +0000
----------------------------------------------------------------------
core/sql/ustat/hs_globals.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/384f8fa8/core/sql/ustat/hs_globals.cpp
----------------------------------------------------------------------
diff --git a/core/sql/ustat/hs_globals.cpp b/core/sql/ustat/hs_globals.cpp
index 59ae5d4..75789e9 100644
--- a/core/sql/ustat/hs_globals.cpp
+++ b/core/sql/ustat/hs_globals.cpp
@@ -3817,10 +3817,10 @@ Lng32 HSSample::make(NABoolean rowCountIsEstimate, // input
char temp1[40]; // way more space than needed, but it's safe
Lng32 wcs = (Lng32)workableCacheSize;
sprintf(temp1,"'%d'",wcs);
- NAString minCQD = "CONTROL QUERY HBASE_NUM_CACHE_ROWS_MIN ";
+ NAString minCQD = "CONTROL QUERY DEFAULT HBASE_NUM_CACHE_ROWS_MIN ";
minCQD += temp1;
HSFuncExecQuery(minCQD);
- NAString maxCQD = "CONTROL QUERY HBASE_NUM_CACHE_ROWS_MAX ";
+ NAString maxCQD = "CONTROL QUERY DEFAULT HBASE_NUM_CACHE_ROWS_MAX ";
maxCQD += temp1;
HSFuncExecQuery(maxCQD);
HBaseCQDsUsed = TRUE; // remember to reset these later