You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafodion.apache.org by sa...@apache.org on 2015/10/27 05:27:50 UTC

[2/3] incubator-trafodion git commit: Rework for issues found by Dave and Selva. The flush size is not set in units of KB, with a default of 1024KB. Explain will report it in rows. Size in rows, as sent to executor is capped at 32767.

Rework for issues found by Dave and Selva.
The flush size is not set in units of KB, with a default of 1024KB.
Explain will report it in rows. Size in rows, as sent to executor
is capped at 32767.


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/9dd48bed
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/9dd48bed
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/9dd48bed

Branch: refs/heads/master
Commit: 9dd48bed51df2a07a977eca2f13e2e68b4a599c4
Parents: f8e41b9
Author: Suresh Subbiah <su...@apache.org>
Authored: Mon Oct 26 17:09:42 2015 +0000
Committer: Suresh Subbiah <su...@apache.org>
Committed: Mon Oct 26 17:09:42 2015 +0000

----------------------------------------------------------------------
 core/sql/executor/ExHbaseAccess.cpp |  4 ++--
 core/sql/executor/ExHbaseAccess.h   |  6 +++---
 core/sql/generator/GenRelUpdate.cpp | 21 ++++++++-------------
 core/sql/regress/hive/EXPECTED020   | 14 +++++++-------
 core/sql/regress/hive/TEST020       |  4 ++--
 core/sql/sqlcomp/DefaultConstants.h |  2 +-
 core/sql/sqlcomp/nadefaults.cpp     |  2 +-
 7 files changed, 24 insertions(+), 29 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/9dd48bed/core/sql/executor/ExHbaseAccess.cpp
----------------------------------------------------------------------
diff --git a/core/sql/executor/ExHbaseAccess.cpp b/core/sql/executor/ExHbaseAccess.cpp
index 4bcbfdd..8e98daa 100644
--- a/core/sql/executor/ExHbaseAccess.cpp
+++ b/core/sql/executor/ExHbaseAccess.cpp
@@ -2315,7 +2315,7 @@ void ExHbaseAccessTcb::allocateDirectBufferForJNI(UInt32 rowLen)
 
 
 void ExHbaseAccessTcb::allocateDirectRowBufferForJNI(
-                      short numCols, UInt16 maxRows)
+                      short numCols, short maxRows)
 {
   UInt32 directBufferOverhead;
   UInt32 maxRowLen;
@@ -2371,7 +2371,7 @@ short ExHbaseAccessTcb::patchDirectRowIDBuffers()
   return numRowsInBuffer;
 }
 
-void ExHbaseAccessTcb::allocateDirectRowIDBufferForJNI(UInt16 maxRows)
+void ExHbaseAccessTcb::allocateDirectRowIDBufferForJNI(short maxRows)
 {
    UInt32 rowIDLen;
    UInt32 maxRowIDLen;

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/9dd48bed/core/sql/executor/ExHbaseAccess.h
----------------------------------------------------------------------
diff --git a/core/sql/executor/ExHbaseAccess.h b/core/sql/executor/ExHbaseAccess.h
index b7cf604..9271227 100644
--- a/core/sql/executor/ExHbaseAccess.h
+++ b/core/sql/executor/ExHbaseAccess.h
@@ -342,10 +342,10 @@ protected:
   void setRowID(char *rowId, Lng32 rowIdLen);
   void allocateDirectBufferForJNI(UInt32 rowLen);
   void allocateDirectRowBufferForJNI(short numCols, 
-                          UInt16 maxRows = 1);
+                          short maxRows = 1);
   short patchDirectRowBuffers();
   short patchDirectRowIDBuffers();
-  void allocateDirectRowIDBufferForJNI(UInt16 maxRows = 1);
+  void allocateDirectRowIDBufferForJNI(short maxRows = 1);
   Lng32 copyColToDirectBuffer( BYTE *rowCurPtr, 
                 char *colName, short colNameLen,
                 NABoolean prependNullVal, char nullVal, 
@@ -464,7 +464,7 @@ protected:
   //
   BYTE *directRowBuffer_;
   Lng32 directRowBufferLen_;
-  UInt16 directBufferMaxRows_;
+  short directBufferMaxRows_;
   // Structure to keep track of current row
   HbaseStr row_;
   // Structure to keep track of current position in direct row buffer

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/9dd48bed/core/sql/generator/GenRelUpdate.cpp
----------------------------------------------------------------------
diff --git a/core/sql/generator/GenRelUpdate.cpp b/core/sql/generator/GenRelUpdate.cpp
index 07944d3..6701094 100644
--- a/core/sql/generator/GenRelUpdate.cpp
+++ b/core/sql/generator/GenRelUpdate.cpp
@@ -2835,19 +2835,14 @@ short HbaseInsert::codeGen(Generator *generator)
         hbasescan_tdb->setNoDuplicates(CmpCommon::getDefault(TRAF_LOAD_PREP_SKIP_DUPLICATES) == DF_OFF);
         hbasescan_tdb->setMaxHFileSize(CmpCommon::getDefaultLong(TRAF_LOAD_MAX_HFILE_SIZE));
 
-	ULng32 loadFlushSize = getDefault(TRAF_LOAD_FLUSH_SIZE_IN_ROWS);
-	if (loadFlushSize == 0) 
-	{// user has not specified a size, assume 1MB buffer is optimal
-
-	  loadFlushSize = (1024*1024)/hbasescan_tdb->getRowLen() ;
-	  if (loadFlushSize > getMaxCardEst().value()) {
-	    // for small tables go back to previous default
-	    loadFlushSize = getDefault(HBASE_ROWSET_VSBB_SIZE);
-	  }
-	}
-	if (loadFlushSize > USHRT_MAX) // largest flush size, runtime cannot
-	  loadFlushSize = USHRT_MAX; // handle higher values without code change
-	hbasescan_tdb->setTrafLoadFlushSize(loadFlushSize);
+	ULng32 loadFlushSizeinKB = getDefault(TRAF_LOAD_FLUSH_SIZE_IN_KB);
+	ULng32 loadFlushSizeinRows = 0;
+	loadFlushSizeinRows = (loadFlushSizeinKB*1024)/hbasescan_tdb->getRowLen() ;
+	// largest flush size, runtime cannot handle higher values 
+	// without code change
+	if (loadFlushSizeinRows >= USHRT_MAX/2)
+	  loadFlushSizeinRows = ((USHRT_MAX/2)-1);
+	hbasescan_tdb->setTrafLoadFlushSize(loadFlushSizeinRows);
 
         // For sample file, set the sample location in HDFS and the sampling rate.
         // Move later, when sampling not limited to bulk loads.

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/9dd48bed/core/sql/regress/hive/EXPECTED020
----------------------------------------------------------------------
diff --git a/core/sql/regress/hive/EXPECTED020 b/core/sql/regress/hive/EXPECTED020
index 8f07fda..636d75e 100644
--- a/core/sql/regress/hive/EXPECTED020
+++ b/core/sql/regress/hive/EXPECTED020
@@ -20,8 +20,8 @@
 +>         cast(LEFT_CHILD_SEQ_NUM as char(2)) lc,
 +>         cast(RIGHT_CHILD_SEQ_NUM as char(2)) rc,
 +>         substring
-+>         (substring(tname from (1+locate('.',tname))),
-+>         (locate('.',substring(tname from (1+locate('.',tname))))),
++>         (substring(substring(tname from (1+locate('.',tname))),1,case locate(')',tname) when 0 then 0 else locate(')',substring(tname from (1+locate('.',tname))))-1 end),
++>         (locate('.',substring(tname from (1+locate('.',tname)))))+1,
 +>         10
 +>        ) tab_name
 +>         from table (explain(NULL,'XX'))
@@ -35,7 +35,7 @@
 >>invoke hive.hive.store_orc;
 
 -- Definition of hive table STORE_ORC
--- Definition current  Wed Oct 21 00:08:18 2015
+-- Definition current  Mon Oct 26 16:10:15 2015
 
   (
     S_STORE_SK                       INT
@@ -159,8 +159,8 @@ S   OPERATOR          LC  RC  TAB_NAME
 
 04  ROOT              3   ?             
 03  HYBRID_HASH_JOIN  2   1             
-02  HIVE_SCAN         ?   ?   .STORE_ORC
-01  HIVE_SCAN         ?   ?   .STORE_ORC
+02  HIVE_SCAN         ?   ?   STORE_ORC 
+01  HIVE_SCAN         ?   ?   STORE_ORC 
 
 --- 4 row(s) selected.
 >>
@@ -198,8 +198,8 @@ S   OPERATOR          LC  RC  TAB_NAME
 
 04  ROOT              3   ?             
 03  NESTED_JOIN       1   2             
-02  HIVE_SCAN         ?   ?   .STORE_ORC
-01  HIVE_SCAN         ?   ?   .STORE)   
+02  HIVE_SCAN         ?   ?   STORE_ORC 
+01  HIVE_SCAN         ?   ?   STORE     
 
 --- 4 row(s) selected.
 >>

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/9dd48bed/core/sql/regress/hive/TEST020
----------------------------------------------------------------------
diff --git a/core/sql/regress/hive/TEST020 b/core/sql/regress/hive/TEST020
index ded1d98..50615db 100644
--- a/core/sql/regress/hive/TEST020
+++ b/core/sql/regress/hive/TEST020
@@ -42,8 +42,8 @@ prepare explainIt from
          cast(LEFT_CHILD_SEQ_NUM as char(2)) lc,
          cast(RIGHT_CHILD_SEQ_NUM as char(2)) rc,
          substring
-         (substring(tname from (1+locate('.',tname))),
-         (locate('.',substring(tname from (1+locate('.',tname))))),
+         (substring(substring(tname from (1+locate('.',tname))),1,case locate(')',tname) when 0 then 0 else locate(')',substring(tname from (1+locate('.',tname))))-1 end),
+         (locate('.',substring(tname from (1+locate('.',tname)))))+1,
          10
         ) tab_name
          from table (explain(NULL,'XX'))

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/9dd48bed/core/sql/sqlcomp/DefaultConstants.h
----------------------------------------------------------------------
diff --git a/core/sql/sqlcomp/DefaultConstants.h b/core/sql/sqlcomp/DefaultConstants.h
index cbe4fcc..fe28164 100644
--- a/core/sql/sqlcomp/DefaultConstants.h
+++ b/core/sql/sqlcomp/DefaultConstants.h
@@ -3766,7 +3766,7 @@ enum DefaultConstants
   // costing code has broader exposure.
   HBASE_DELETE_COSTING,
   HBASE_UPDATE_COSTING,
-  TRAF_LOAD_FLUSH_SIZE_IN_ROWS,
+  TRAF_LOAD_FLUSH_SIZE_IN_KB,
 
   // This enum constant must be the LAST one in the list; it's a count,
   // not an Attribute (it's not IN DefaultDefaults; it's the SIZE of it)!

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/9dd48bed/core/sql/sqlcomp/nadefaults.cpp
----------------------------------------------------------------------
diff --git a/core/sql/sqlcomp/nadefaults.cpp b/core/sql/sqlcomp/nadefaults.cpp
index e3ad370..95b26b6 100644
--- a/core/sql/sqlcomp/nadefaults.cpp
+++ b/core/sql/sqlcomp/nadefaults.cpp
@@ -3317,7 +3317,7 @@ XDDkwd__(SUBQUERY_UNNESTING,			"ON"),
   DD_____(TRAF_LOAD_ERROR_COUNT_ID,             "" ),
   DD_____(TRAF_LOAD_ERROR_COUNT_TABLE,          "ERRORCOUNTER" ),
   DD_____(TRAF_LOAD_ERROR_LOGGING_LOCATION,     "/bulkload/logs/" ),
-  DDint__(TRAF_LOAD_FLUSH_SIZE_IN_ROWS,         "0"), // in # rows
+  DDint__(TRAF_LOAD_FLUSH_SIZE_IN_KB,           "1024"),
   DDkwd__(TRAF_LOAD_FORCE_CIF,                  "ON"),
   DDkwd__(TRAF_LOAD_LOG_ERROR_ROWS,             "OFF"),
   DDint__(TRAF_LOAD_MAX_ERROR_ROWS,             "0"),