You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafodion.apache.org by hz...@apache.org on 2016/02/01 22:49:33 UTC

[1/9] incubator-trafodion git commit: for jira 1720, add support to convert gbk into utf8

Repository: incubator-trafodion
Updated Branches:
  refs/heads/master aa3deffb1 -> 9cc1e835f


for jira 1720, add support to convert gbk into utf8


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/d51d2016
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/d51d2016
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/d51d2016

Branch: refs/heads/master
Commit: d51d2016d29725e3d863b351b3a548ee1b325c0c
Parents: 141f354
Author: Cloud User <ce...@ming01.novalocal>
Authored: Sat Jan 9 11:43:21 2016 +0000
Committer: Cloud User <ce...@ming01.novalocal>
Committed: Sat Jan 9 11:43:21 2016 +0000

----------------------------------------------------------------------
 core/sql/common/csconvert.cpp       | 42 ++++++++++++++++++++++++++++++++
 core/sql/common/csconvert.h         |  4 +++
 core/sql/exp/exp_clause_derived.h   |  4 ++-
 core/sql/exp/exp_conv.cpp           | 26 ++++++++++++++++++++
 core/sql/generator/GenItemFunc.cpp  |  3 +++
 core/sql/optimizer/ItemExpr.cpp     |  2 ++
 core/sql/optimizer/ItemFunc.h       |  1 +
 core/sql/optimizer/SynthType.cpp    | 12 +++++++++
 core/sql/sqlcomp/DefaultConstants.h |  3 +++
 core/sql/sqlcomp/nadefaults.cpp     |  2 ++
 10 files changed, 98 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/common/csconvert.cpp
----------------------------------------------------------------------
diff --git a/core/sql/common/csconvert.cpp b/core/sql/common/csconvert.cpp
index ffc5370..b0d21af 100644
--- a/core/sql/common/csconvert.cpp
+++ b/core/sql/common/csconvert.cpp
@@ -30,6 +30,10 @@
 //       but also used by the ODBC build and maybe others.
 
 #include <limits.h>
+#include <iconv.h>
+#include <stdio.h>
+#include <stdlib.h>
+
 #include "multi-byte.h"
 #include "fcconv.h"
 #include "csconvert.h"
@@ -1275,3 +1279,41 @@ char * findStartOfChar( char *someByteInChar, char *startOfBuffer )
      rtnv-- ;
   return rtnv ;
 }
+/* A method to do character set conversion , using Glibc iconv */
+int code_convert(const char *from_charset,const char *to_charset,char *inbuf, size_t inlen, char *outbuf,size_t outlen)
+{
+  iconv_t cd;
+  int rc;
+  char **pin = &inbuf;
+  char **pout = &outbuf;
+
+  cd = iconv_open(to_charset,from_charset);
+  if (cd==0) return -1;
+  memset(outbuf,0,outlen);
+  if (iconv(cd,pin,(size_t*)&inlen,pout,(size_t *)&outlen)==-1) 
+  {
+    iconv_close(cd);
+    return -1;
+  }
+  iconv_close(cd);
+  return outlen;
+}
+/* from gbk to utf8 */
+int gbk2utf8(char *inbuf,size_t inlen,char *outbuf,size_t outlen)
+{
+  return code_convert("gbk","utf-8",inbuf,inlen,outbuf,outlen);
+}
+
+int gbkToUtf8(char* gbkString, size_t gbklen, 
+              char* result ,size_t outlen, int addNullAtEnd)
+{
+
+   int finalLength = gbk2utf8 ( gbkString, gbklen,  result, outlen);
+   
+   if (finalLength == -1 ) return 0;
+   
+   if ( addNullAtEnd > 0 )
+      result[finalLength] = 0;
+
+   return finalLength;
+}

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/common/csconvert.h
----------------------------------------------------------------------
diff --git a/core/sql/common/csconvert.h b/core/sql/common/csconvert.h
index 56c9603..57fec71 100644
--- a/core/sql/common/csconvert.h
+++ b/core/sql/common/csconvert.h
@@ -106,6 +106,10 @@ int  UTF16ToLocale( const enum cnv_version version,
                     unsigned int * translated_char_cnt_p = NULL  ,
                     const char *substitution_char        = NULL );
 
+NA_EIDPROC
+int gbkToUtf8(char* gbkString, size_t gbklen,
+              char* result ,size_t outlen, int addNullAtEnd=FALSE);
+
 /*
  * LocaleCharToUCS4() converts the FIRST char in the input string to its
  * UCS4 value.  Returns the UCS4 value at location specified AND the

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/exp/exp_clause_derived.h
----------------------------------------------------------------------
diff --git a/core/sql/exp/exp_clause_derived.h b/core/sql/exp/exp_clause_derived.h
index fc6ecfe..4558431 100644
--- a/core/sql/exp/exp_clause_derived.h
+++ b/core/sql/exp/exp_clause_derived.h
@@ -1542,7 +1542,9 @@ enum conv_case_index {
   CONV_UTF8_F_UCS2_V                   =248,
 
   CONV_BLOB_BLOB                       =249,
-  CONV_BLOB_ASCII_F                    =250
+  CONV_BLOB_ASCII_F                    =250,
+
+  CONV_GBK_F_UTF8_V                    =251
 };
 
 class SQLEXP_LIB_FUNC  ex_conv_clause : public ex_clause {

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/exp/exp_conv.cpp
----------------------------------------------------------------------
diff --git a/core/sql/exp/exp_conv.cpp b/core/sql/exp/exp_conv.cpp
index 8c31efc..015ec0f 100644
--- a/core/sql/exp/exp_conv.cpp
+++ b/core/sql/exp/exp_conv.cpp
@@ -9321,6 +9321,32 @@ convDoIt(char * source,
   };
   break;
 
+// gb2312 -> utf8
+  case CONV_GBK_F_UTF8_V:
+  {
+    char * targetbuf = new char[sourceLen*4+1];
+    size_t sl = sourceLen;
+    int convLen = gbkToUtf8( source, sl, targetbuf, sl*4);
+    int copyLen = 0;
+    if (convLen > 0) {
+      copyLen = (convLen< targetLen) ? convLen: targetLen;
+      str_cpy_all(target, targetbuf, copyLen);
+    //  if (convLen > targetLen)
+
+    }
+    else {
+      // LCOV_EXCL_START
+      convLen = 0;
+      copyLen = 0;
+      // LCOV_EXCL_STOP
+    }
+
+    if ( varCharLen )
+       setVCLength(varCharLen, varCharLenSize, copyLen);
+    delete targetbuf;
+
+  };
+  break;
 // 5/10/98: sjis -> unicode
   case CONV_SJIS_F_UNICODE_F: 
   case CONV_SJIS_F_UNICODE_V: 

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/generator/GenItemFunc.cpp
----------------------------------------------------------------------
diff --git a/core/sql/generator/GenItemFunc.cpp b/core/sql/generator/GenItemFunc.cpp
index c7e6748..8c8f7e6 100644
--- a/core/sql/generator/GenItemFunc.cpp
+++ b/core/sql/generator/GenItemFunc.cpp
@@ -1737,6 +1737,9 @@ short Translate::codeGen(Generator * generator)
      case UCS2_TO_UTF8:
 	convType = CONV_UCS2_F_UTF8_V;
 	break;
+     case GBK_TO_UTF8:
+        convType = CONV_GBK_F_UTF8_V;
+        break;
      case UNICODE_TO_ISO88591:
 	convType = CONV_UNICODE_F_ASCII_V;
 	break;

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/optimizer/ItemExpr.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/ItemExpr.cpp b/core/sql/optimizer/ItemExpr.cpp
index 41b6b76..7df1a7e 100644
--- a/core/sql/optimizer/ItemExpr.cpp
+++ b/core/sql/optimizer/ItemExpr.cpp
@@ -13408,6 +13408,8 @@ Translate::Translate(ItemExpr *valPtr, NAString* map_table_name)
     map_table_id_ = Translate::SJIS_TO_UTF8;
   else if ( _strcmpi(map_table_name->data(), "UTF8TOSJIS") == 0 )
     map_table_id_ = Translate::UTF8_TO_SJIS;
+  else if ( _strcmpi(map_table_name->data(), "GBKTOUTF8") == 0 )
+    map_table_id_ = Translate::GBK_TO_UTF8;
 
                 else
                   if ( _strcmpi(map_table_name->data(), "KANJITOISO88591") == 0 )

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/optimizer/ItemFunc.h
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/ItemFunc.h b/core/sql/optimizer/ItemFunc.h
index 3bb7238..d794a3d 100644
--- a/core/sql/optimizer/ItemFunc.h
+++ b/core/sql/optimizer/ItemFunc.h
@@ -2112,6 +2112,7 @@ public:
         UTF8_TO_SJIS, SJIS_TO_UTF8, UTF8_TO_ISO88591,
         ISO88591_TO_UTF8,
         KANJI_MP_TO_ISO88591, KSC5601_MP_TO_ISO88591,
+        GBK_TO_UTF8,
         UNKNOWN_TRANSLATION};
 
   Translate(ItemExpr *valPtr, NAString* map_table_name);

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/optimizer/SynthType.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/SynthType.cpp b/core/sql/optimizer/SynthType.cpp
index b9d5518..56e341f 100644
--- a/core/sql/optimizer/SynthType.cpp
+++ b/core/sql/optimizer/SynthType.cpp
@@ -5207,6 +5207,18 @@ const NAType *Translate::synthesizeType()
          err4106arg = SQLCHARSETSTRING_UTF8;
        break;
 
+     case GBK_TO_UTF8:
+       if (translateSource->getCharSet() == CharInfo::GBK || translateSource->getCharSet() == CharInfo::UnknownCharSet )
+         charsetTarget = CharInfo::UTF8;
+       else
+       {
+            if( CmpCommon::getDefaultString(HIVE_FILE_CHARSET) ==  CmpCommon::getDefaultString(HIVE_DEFAULT_CHARSET) )
+              err4106arg = SQLCHARSETCODE_GB2312;
+            else
+             charsetTarget = CharInfo::UTF8;
+       }
+       break;
+
      case ISO88591_TO_UTF8:
        if (translateSource->getCharSet() == CharInfo::ISO88591)
        {

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/sqlcomp/DefaultConstants.h
----------------------------------------------------------------------
diff --git a/core/sql/sqlcomp/DefaultConstants.h b/core/sql/sqlcomp/DefaultConstants.h
index 2778197..d10acc8 100644
--- a/core/sql/sqlcomp/DefaultConstants.h
+++ b/core/sql/sqlcomp/DefaultConstants.h
@@ -3785,6 +3785,9 @@ enum DefaultConstants
   // set to ON to aggressively allocate ESP per core
   AGGRESSIVE_ESP_ALLOCATION_PER_CORE,
 
+  // real charset in the HIVE table
+  HIVE_FILE_CHARSET,
+
   // This enum constant must be the LAST one in the list; it's a count,
   // not an Attribute (it's not IN DefaultDefaults; it's the SIZE of it)!
   __NUM_DEFAULT_ATTRIBUTES

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/sqlcomp/nadefaults.cpp
----------------------------------------------------------------------
diff --git a/core/sql/sqlcomp/nadefaults.cpp b/core/sql/sqlcomp/nadefaults.cpp
index f6d5604..cfe2cf3 100644
--- a/core/sql/sqlcomp/nadefaults.cpp
+++ b/core/sql/sqlcomp/nadefaults.cpp
@@ -1958,6 +1958,7 @@ SDDkwd__(EXE_DIAGNOSTIC_EVENTS,		"OFF"),
 
   DDkwd__(HIVE_DEFAULT_CHARSET,            (char *)SQLCHARSETSTRING_UTF8),
   DD_____(HIVE_DEFAULT_SCHEMA,                  "HIVE"),
+  DD_____(HIVE_FILE_CHARSET,            (char *)SQLCHARSETSTRING_UTF8),
   DD_____(HIVE_FILE_NAME,     "/hive/tpcds/customer/customer.dat" ),
   DD_____(HIVE_HDFS_STATS_LOG_FILE,             ""),
   DDint__(HIVE_LIB_HDFS_PORT_OVERRIDE,          "-1"),
@@ -6390,6 +6391,7 @@ DefaultToken NADefaults::token(Int32 attrEnum,
   else {
     if ((attrEnum == TERMINAL_CHARSET) ||
         (attrEnum == USE_HIVE_SOURCE) ||
+        (attrEnum == HIVE_FILE_CHARSET) ||
         (attrEnum == HBASE_DATA_BLOCK_ENCODING_OPTION) ||
         (attrEnum == HBASE_COMPRESSION_OPTION))
       return DF_USER;


[4/9] incubator-trafodion git commit: fix more according to jira 1720 review comments

Posted by hz...@apache.org.
fix more according to jira 1720 review comments


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/82b256c0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/82b256c0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/82b256c0

Branch: refs/heads/master
Commit: 82b256c00859ae80fb83351c940bddb12ce1e15b
Parents: 3efa731
Author: Cloud User <ce...@ming01.novalocal>
Authored: Wed Jan 13 13:54:21 2016 +0000
Committer: Cloud User <ce...@ming01.novalocal>
Committed: Wed Jan 13 13:54:21 2016 +0000

----------------------------------------------------------------------
 core/sql/common/csconvert.cpp    | 12 +++---------
 core/sql/exp/exp_conv.cpp        | 17 ++++++-----------
 core/sql/optimizer/SynthType.cpp | 10 ++++++++--
 core/sql/sqlcomp/nadefaults.cpp  |  2 +-
 4 files changed, 18 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/82b256c0/core/sql/common/csconvert.cpp
----------------------------------------------------------------------
diff --git a/core/sql/common/csconvert.cpp b/core/sql/common/csconvert.cpp
index a18b1c3..438ee8b 100644
--- a/core/sql/common/csconvert.cpp
+++ b/core/sql/common/csconvert.cpp
@@ -1279,8 +1279,9 @@ char * findStartOfChar( char *someByteInChar, char *startOfBuffer )
      rtnv-- ;
   return rtnv ;
 }
+
 /* A method to do character set conversion , using Glibc iconv */
-int code_convert(const char *from_charset,const char *to_charset,char *inbuf, size_t inlen, char *outbuf,size_t outlen)
+static int code_convert(const char *from_charset,const char *to_charset,char *inbuf, size_t inlen, char *outbuf,size_t outlen)
 {
   iconv_t cd;
   int rc;
@@ -1298,23 +1299,16 @@ int code_convert(const char *from_charset,const char *to_charset,char *inbuf, si
   iconv_close(cd);
   return outlen;
 }
-/* from gbk to utf8 */
-int gbk2utf8(char *inbuf,size_t inlen,char *outbuf,size_t outlen)
-{
-  return code_convert("gbk","utf-8",inbuf,inlen,outbuf,outlen);
-}
 
 /* convert gbk string into UTF8 */
 int gbkToUtf8(char* gbkString, size_t gbklen, 
               char* result ,size_t outlen, bool addNullAtEnd)
 {
    int originalOutlen = outlen;
-   int finalLength = gbk2utf8 ( gbkString, gbklen,  result, outlen);
+   int finalLength = code_convert( "gbk","utf-8", gbkString, gbklen,  result, outlen);
    
    if (finalLength == -1 ) return 0;
    
-   //the result is allocated with lenght originalOutlen + 1
-   //so no overrun is possible
    if ( addNullAtEnd )
    {
       if(originalOutlen >= finalLength )

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/82b256c0/core/sql/exp/exp_conv.cpp
----------------------------------------------------------------------
diff --git a/core/sql/exp/exp_conv.cpp b/core/sql/exp/exp_conv.cpp
index 319727f..b2a11cd 100644
--- a/core/sql/exp/exp_conv.cpp
+++ b/core/sql/exp/exp_conv.cpp
@@ -9322,18 +9322,18 @@ convDoIt(char * source,
   break;
 
 // gb2312 -> utf8
+// JIRA 1720
   case CONV_GBK_F_UTF8_V:
   {
-    char * targetbuf = new char[sourceLen*4+1];
-    size_t sl = sourceLen;
-    int convLen = gbkToUtf8( source, sl, targetbuf, sl*4);
     int copyLen = 0;
+    int convLen = gbkToUtf8( source, sourceLen, target, targetLen);
     if (convLen > 0) {
-      copyLen = (convLen< targetLen) ? convLen: targetLen;
-      str_cpy_all(target, targetbuf, copyLen);
+      copyLen = convLen; 
       //if the target length is not enough, instead of truncate, raise a SQL Error
       if (convLen > targetLen)
-         ExRaiseSqlError(heap, diagsArea, EXE_STRING_OVERFLOW);
+        ExRaiseSqlError(heap, diagsArea, EXE_STRING_OVERFLOW);
+      if ( varCharLen )
+        setVCLength(varCharLen, varCharLenSize, copyLen);
     }
     else {
       // LCOV_EXCL_START
@@ -9341,11 +9341,6 @@ convDoIt(char * source,
       copyLen = 0;
       // LCOV_EXCL_STOP
     }
-
-    if ( varCharLen )
-       setVCLength(varCharLen, varCharLenSize, copyLen);
-    delete targetbuf;
-
   };
   break;
 // 5/10/98: sjis -> unicode

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/82b256c0/core/sql/optimizer/SynthType.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/SynthType.cpp b/core/sql/optimizer/SynthType.cpp
index 56e341f..7d1c8a9 100644
--- a/core/sql/optimizer/SynthType.cpp
+++ b/core/sql/optimizer/SynthType.cpp
@@ -5208,11 +5208,17 @@ const NAType *Translate::synthesizeType()
        break;
 
      case GBK_TO_UTF8:
-       if (translateSource->getCharSet() == CharInfo::GBK || translateSource->getCharSet() == CharInfo::UnknownCharSet )
+       if (translateSource->getCharSet() == CharInfo::GBK )
          charsetTarget = CharInfo::UTF8;
        else
        {
-            if( CmpCommon::getDefaultString(HIVE_FILE_CHARSET) ==  CmpCommon::getDefaultString(HIVE_DEFAULT_CHARSET) )
+           /* this is a solution to support GBK before Trafodion can support GBK in total
+            * see jira 1720 for more details
+            * the logic here is:
+            * when HIVE_FILE_CHARSET is not empty, it means the real charset in Hive table is not same as HIVE_DEFAULT_CHARSET
+            * in this case, allow the converting , ignoring the source charset checking above
+            */
+            if( CmpCommon::getDefaultString(HIVE_FILE_CHARSET) ==  "" ) //CmpCommon::getDefaultString(HIVE_DEFAULT_CHARSET) )
               err4106arg = SQLCHARSETCODE_GB2312;
             else
              charsetTarget = CharInfo::UTF8;

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/82b256c0/core/sql/sqlcomp/nadefaults.cpp
----------------------------------------------------------------------
diff --git a/core/sql/sqlcomp/nadefaults.cpp b/core/sql/sqlcomp/nadefaults.cpp
index cfe2cf3..20152d6 100644
--- a/core/sql/sqlcomp/nadefaults.cpp
+++ b/core/sql/sqlcomp/nadefaults.cpp
@@ -1958,7 +1958,7 @@ SDDkwd__(EXE_DIAGNOSTIC_EVENTS,		"OFF"),
 
   DDkwd__(HIVE_DEFAULT_CHARSET,            (char *)SQLCHARSETSTRING_UTF8),
   DD_____(HIVE_DEFAULT_SCHEMA,                  "HIVE"),
-  DD_____(HIVE_FILE_CHARSET,            (char *)SQLCHARSETSTRING_UTF8),
+  DD_____(HIVE_FILE_CHARSET,                    ""),
   DD_____(HIVE_FILE_NAME,     "/hive/tpcds/customer/customer.dat" ),
   DD_____(HIVE_HDFS_STATS_LOG_FILE,             ""),
   DDint__(HIVE_LIB_HDFS_PORT_OVERRIDE,          "-1"),


[2/9] incubator-trafodion git commit: fix the pull-request comments for jira1720

Posted by hz...@apache.org.
fix the pull-request comments for jira1720


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/12a0c3e4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/12a0c3e4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/12a0c3e4

Branch: refs/heads/master
Commit: 12a0c3e4006d9c2b2cf9e62f23121b27ddee17c9
Parents: d51d201
Author: Cloud User <ce...@ming01.novalocal>
Authored: Tue Jan 12 14:43:03 2016 +0000
Committer: Cloud User <ce...@ming01.novalocal>
Committed: Tue Jan 12 14:43:03 2016 +0000

----------------------------------------------------------------------
 core/sql/common/csconvert.cpp | 10 ++++++++--
 core/sql/common/csconvert.h   |  2 +-
 core/sql/exp/exp_conv.cpp     |  5 +++--
 3 files changed, 12 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/12a0c3e4/core/sql/common/csconvert.cpp
----------------------------------------------------------------------
diff --git a/core/sql/common/csconvert.cpp b/core/sql/common/csconvert.cpp
index b0d21af..f94ec2a 100644
--- a/core/sql/common/csconvert.cpp
+++ b/core/sql/common/csconvert.cpp
@@ -1304,16 +1304,22 @@ int gbk2utf8(char *inbuf,size_t inlen,char *outbuf,size_t outlen)
   return code_convert("gbk","utf-8",inbuf,inlen,outbuf,outlen);
 }
 
+/* convert gbk string into UTF8 */
 int gbkToUtf8(char* gbkString, size_t gbklen, 
               char* result ,size_t outlen, int addNullAtEnd)
 {
-
+   int originalOutlen = outlen;
    int finalLength = gbk2utf8 ( gbkString, gbklen,  result, outlen);
    
    if (finalLength == -1 ) return 0;
    
+   //the result is allocated with lenght originalOutlen + 1
+   //so no overrun is possible
    if ( addNullAtEnd > 0 )
-      result[finalLength] = 0;
+   {
+      if(originalOutlen >= finalLength )
+        result[finalLength] = 0;
+   }
 
    return finalLength;
 }

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/12a0c3e4/core/sql/common/csconvert.h
----------------------------------------------------------------------
diff --git a/core/sql/common/csconvert.h b/core/sql/common/csconvert.h
index 57fec71..810fae5 100644
--- a/core/sql/common/csconvert.h
+++ b/core/sql/common/csconvert.h
@@ -108,7 +108,7 @@ int  UTF16ToLocale( const enum cnv_version version,
 
 NA_EIDPROC
 int gbkToUtf8(char* gbkString, size_t gbklen,
-              char* result ,size_t outlen, int addNullAtEnd=FALSE);
+              char* result ,size_t outlen, bool addNullAtEnd=FALSE);
 
 /*
  * LocaleCharToUCS4() converts the FIRST char in the input string to its

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/12a0c3e4/core/sql/exp/exp_conv.cpp
----------------------------------------------------------------------
diff --git a/core/sql/exp/exp_conv.cpp b/core/sql/exp/exp_conv.cpp
index 015ec0f..319727f 100644
--- a/core/sql/exp/exp_conv.cpp
+++ b/core/sql/exp/exp_conv.cpp
@@ -9331,8 +9331,9 @@ convDoIt(char * source,
     if (convLen > 0) {
       copyLen = (convLen< targetLen) ? convLen: targetLen;
       str_cpy_all(target, targetbuf, copyLen);
-    //  if (convLen > targetLen)
-
+      //if the target length is not enough, instead of truncate, raise a SQL Error
+      if (convLen > targetLen)
+         ExRaiseSqlError(heap, diagsArea, EXE_STRING_OVERFLOW);
     }
     else {
       // LCOV_EXCL_START


[5/9] incubator-trafodion git commit: refine the iconv function, remove unnecessary code to boost perf further

Posted by hz...@apache.org.
refine the iconv function, remove unnecessary code to boost perf further


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/aecc2db1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/aecc2db1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/aecc2db1

Branch: refs/heads/master
Commit: aecc2db1d76b1f030fc115060a2ad27de4802f65
Parents: 82b256c
Author: Liu Ming <mi...@esgyn.cn>
Authored: Wed Jan 13 14:09:26 2016 +0000
Committer: Liu Ming <mi...@esgyn.cn>
Committed: Wed Jan 13 14:09:26 2016 +0000

----------------------------------------------------------------------
 core/sql/common/csconvert.cpp | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/aecc2db1/core/sql/common/csconvert.cpp
----------------------------------------------------------------------
diff --git a/core/sql/common/csconvert.cpp b/core/sql/common/csconvert.cpp
index 438ee8b..2423976 100644
--- a/core/sql/common/csconvert.cpp
+++ b/core/sql/common/csconvert.cpp
@@ -1281,21 +1281,24 @@ char * findStartOfChar( char *someByteInChar, char *startOfBuffer )
 }
 
 /* A method to do character set conversion , using Glibc iconv */
-static int code_convert(const char *from_charset,const char *to_charset,char *inbuf, size_t inlen, char *outbuf,size_t outlen)
+static int charsetConvert(const char *srcCharset,const char *targetCharset,char *inputbuf, size_t inputlen, char *outbuf,size_t outlen)
 {
+  char **ptrin = &inputbuf;
+  char **ptrout = &outbuf;
+
   iconv_t cd;
-  int rc;
-  char **pin = &inbuf;
-  char **pout = &outbuf;
-
-  cd = iconv_open(to_charset,from_charset);
-  if (cd==0) return -1;
-  memset(outbuf,0,outlen);
-  if (iconv(cd,pin,(size_t*)&inlen,pout,(size_t *)&outlen)==-1) 
+  cd = iconv_open(targetCharset,srcCharset);
+
+  if (cd==0) 
+    return -1;
+
+  if (iconv(cd,ptrin,(size_t*)&inputlen,ptrout,(size_t *)&outlen) == -1) 
   {
+    //error occurs
     iconv_close(cd);
     return -1;
   }
+
   iconv_close(cd);
   return outlen;
 }
@@ -1305,14 +1308,15 @@ int gbkToUtf8(char* gbkString, size_t gbklen,
               char* result ,size_t outlen, bool addNullAtEnd)
 {
    int originalOutlen = outlen;
-   int finalLength = code_convert( "gbk","utf-8", gbkString, gbklen,  result, outlen);
+   int finalLength = charsetConvert( "gbk","utf-8", gbkString, gbklen,  result, outlen);
    
-   if (finalLength == -1 ) return 0;
+   if (finalLength == -1 ) 
+     return 0;
    
    if ( addNullAtEnd )
    {
-      if(originalOutlen >= finalLength )
-        result[finalLength] = 0;
+     if(originalOutlen >= finalLength )
+       result[finalLength] = 0;
    }
 
    return finalLength;


[3/9] incubator-trafodion git commit: fix comments for jira 1720

Posted by hz...@apache.org.
fix comments for jira 1720


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/3efa7313
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/3efa7313
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/3efa7313

Branch: refs/heads/master
Commit: 3efa7313c51b31b4d1a3be28a84a083a63dd0178
Parents: 12a0c3e
Author: Cloud User <ce...@ming01.novalocal>
Authored: Tue Jan 12 15:56:46 2016 +0000
Committer: Cloud User <ce...@ming01.novalocal>
Committed: Tue Jan 12 15:56:46 2016 +0000

----------------------------------------------------------------------
 core/sql/common/csconvert.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/3efa7313/core/sql/common/csconvert.cpp
----------------------------------------------------------------------
diff --git a/core/sql/common/csconvert.cpp b/core/sql/common/csconvert.cpp
index f94ec2a..a18b1c3 100644
--- a/core/sql/common/csconvert.cpp
+++ b/core/sql/common/csconvert.cpp
@@ -1306,7 +1306,7 @@ int gbk2utf8(char *inbuf,size_t inlen,char *outbuf,size_t outlen)
 
 /* convert gbk string into UTF8 */
 int gbkToUtf8(char* gbkString, size_t gbklen, 
-              char* result ,size_t outlen, int addNullAtEnd)
+              char* result ,size_t outlen, bool addNullAtEnd)
 {
    int originalOutlen = outlen;
    int finalLength = gbk2utf8 ( gbkString, gbklen,  result, outlen);
@@ -1315,7 +1315,7 @@ int gbkToUtf8(char* gbkString, size_t gbklen,
    
    //the result is allocated with lenght originalOutlen + 1
    //so no overrun is possible
-   if ( addNullAtEnd > 0 )
+   if ( addNullAtEnd )
    {
       if(originalOutlen >= finalLength )
         result[finalLength] = 0;


[9/9] incubator-trafodion git commit: [TRAFODION-1720] Add support to convert gbk into utf8

Posted by hz...@apache.org.
[TRAFODION-1720] Add support to convert gbk into utf8


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/9cc1e835
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/9cc1e835
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/9cc1e835

Branch: refs/heads/master
Commit: 9cc1e835fde12af5c0ce0d74395876ab48a071cc
Parents: aa3deff e6e88d0
Author: Hans Zeller <hz...@apache.org>
Authored: Mon Feb 1 21:47:43 2016 +0000
Committer: Hans Zeller <hz...@apache.org>
Committed: Mon Feb 1 21:47:43 2016 +0000

----------------------------------------------------------------------
 core/sql/common/csconvert.cpp            |  48 ++++++++++++++++++++++++++
 core/sql/common/csconvert.h              |   4 +++
 core/sql/exp/exp_clause_derived.h        |   4 ++-
 core/sql/exp/exp_conv.cpp                |  26 ++++++++++++++
 core/sql/generator/GenItemFunc.cpp       |   3 ++
 core/sql/generator/GenRelScan.cpp        |  22 +++++++++---
 core/sql/optimizer/BindItemExpr.cpp      |  36 +++++++++++++++----
 core/sql/optimizer/ItemExpr.cpp          |   2 ++
 core/sql/optimizer/ItemFunc.h            |   1 +
 core/sql/optimizer/SynthType.cpp         |   7 ++++
 core/sql/regress/hive/EXPECTED005        |  26 ++++++++++++++
 core/sql/regress/hive/TEST005            |   7 ++++
 core/sql/regress/hive/TEST005_a.hive.sql |   9 +++++
 core/sql/regress/hive/tbl_gbk.data       | Bin 0 -> 129 bytes
 core/sql/sqlcomp/DefaultConstants.h      |   3 ++
 core/sql/sqlcomp/nadefaults.cpp          |   2 ++
 16 files changed, 188 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/9cc1e835/core/sql/generator/GenRelScan.cpp
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/9cc1e835/core/sql/sqlcomp/DefaultConstants.h
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/9cc1e835/core/sql/sqlcomp/nadefaults.cpp
----------------------------------------------------------------------


[8/9] incubator-trafodion git commit: change more for jira 1720 comments

Posted by hz...@apache.org.
change more for jira 1720 comments


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/e6e88d06
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/e6e88d06
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/e6e88d06

Branch: refs/heads/master
Commit: e6e88d0628979982f02d54c094156cf34f19c7a1
Parents: 63ef4bd
Author: Liu Ming <mi...@esgyn.cn>
Authored: Fri Jan 29 14:36:11 2016 +0000
Committer: Liu Ming <mi...@esgyn.cn>
Committed: Fri Jan 29 14:36:11 2016 +0000

----------------------------------------------------------------------
 core/sql/common/csconvert.cpp     | 4 +++-
 core/sql/exp/exp_conv.cpp         | 2 +-
 core/sql/generator/GenRelScan.cpp | 6 +-----
 3 files changed, 5 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/e6e88d06/core/sql/common/csconvert.cpp
----------------------------------------------------------------------
diff --git a/core/sql/common/csconvert.cpp b/core/sql/common/csconvert.cpp
index fc4263d..b4bdaac 100644
--- a/core/sql/common/csconvert.cpp
+++ b/core/sql/common/csconvert.cpp
@@ -1315,8 +1315,10 @@ int gbkToUtf8(char* gbkString, size_t gbklen,
    
    if ( addNullAtEnd )
    {
-     if(originalOutlen >= finalLength )
+     if(originalOutlen > finalLength )
        result[finalLength] = 0;
+     else
+       return -1;
    }
 
    return finalLength;

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/e6e88d06/core/sql/exp/exp_conv.cpp
----------------------------------------------------------------------
diff --git a/core/sql/exp/exp_conv.cpp b/core/sql/exp/exp_conv.cpp
index 36f9a30..3a488fe 100644
--- a/core/sql/exp/exp_conv.cpp
+++ b/core/sql/exp/exp_conv.cpp
@@ -9327,7 +9327,7 @@ convDoIt(char * source,
   {
     int copyLen = 0;
     int convLen = gbkToUtf8( source, sourceLen, target, targetLen);
-    if (convLen > 0) {
+    if (convLen >= 0) {
       copyLen = convLen; 
       if ( varCharLen )
         setVCLength(varCharLen, varCharLenSize, copyLen);

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/e6e88d06/core/sql/generator/GenRelScan.cpp
----------------------------------------------------------------------
diff --git a/core/sql/generator/GenRelScan.cpp b/core/sql/generator/GenRelScan.cpp
index 7729b13..eaf46f6 100644
--- a/core/sql/generator/GenRelScan.cpp
+++ b/core/sql/generator/GenRelScan.cpp
@@ -216,7 +216,7 @@ int HbaseAccess::createAsciiColAndCastExpr(Generator * generator,
     }
 
   if (newGivenType->getTypeQualifier() == NA_CHARACTER_TYPE &&
-      CmpCommon::getDefaultString(HIVE_FILE_CHARSET) == "GBK")
+      (CmpCommon::getDefaultString(HIVE_FILE_CHARSET) == "GBK" || CmpCommon::getDefaultString(HIVE_FILE_CHARSET) == "gbk") && CmpCommon::getDefaultString(HIVE_DEFAULT_CHARSET) == "UTF8" )
         needTranslate = TRUE;
 
   // source ascii row is a varchar where the data is a pointer to the source data
@@ -798,7 +798,6 @@ short FileScan::codeGenForHive(Generator * generator)
   const Int32 executorPredTuppIndex = 3;
   const Int32 asciiTuppIndex = 4;
   ULng32 asciiRowLen; 
-  ULng32 translateRowLen; 
   ExpTupleDesc * asciiTupleDesc = 0;
 
   ex_cri_desc * work_cri_desc = NULL;
@@ -808,7 +807,6 @@ short FileScan::codeGenForHive(Generator * generator)
   ExpTupleDesc::TupleDataFormat asciiRowFormat = ExpTupleDesc::SQLARK_EXPLODED_FORMAT;
   ExpTupleDesc::TupleDataFormat hdfsRowFormat = ExpTupleDesc::SQLMX_ALIGNED_FORMAT;
   ValueIdList asciiVids;
-  ValueIdList transVids;
   ValueIdList executorPredCastVids;
   ValueIdList projectExprOnlyCastVids;
 
@@ -1515,7 +1513,6 @@ short HbaseAccess::genRowIdExpr(Generator * generator,
 	  int res;
 	  ItemExpr * castVal = NULL;
 	  ItemExpr * asciiVal = NULL;
-	  ItemExpr * transVal = NULL;
 	  res = createAsciiColAndCastExpr(generator,
 					  givenType,
 					  asciiVal, castVal);
@@ -1611,7 +1608,6 @@ short HbaseAccess::genRowIdExprForNonSQ(Generator * generator,
 	  int res;
 	  ItemExpr * castVal = NULL;
 	  ItemExpr * asciiVal = NULL;
-	  ItemExpr * transVal = NULL;
 	  res = createAsciiColAndCastExpr(generator,
 					  givenType,
 					  asciiVal, castVal);


[7/9] incubator-trafodion git commit: fix some comments

Posted by hz...@apache.org.
fix some comments


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/63ef4bdf
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/63ef4bdf
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/63ef4bdf

Branch: refs/heads/master
Commit: 63ef4bdfc880a5e295747b6cdd06b556807a642c
Parents: 6abfa2a
Author: Liu Ming <mi...@esgyn.cn>
Authored: Thu Jan 28 09:49:03 2016 +0000
Committer: Liu Ming <mi...@esgyn.cn>
Committed: Thu Jan 28 09:49:03 2016 +0000

----------------------------------------------------------------------
 core/sql/optimizer/SynthType.cpp | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/63ef4bdf/core/sql/optimizer/SynthType.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/SynthType.cpp b/core/sql/optimizer/SynthType.cpp
index 5736422..fc0f12d 100644
--- a/core/sql/optimizer/SynthType.cpp
+++ b/core/sql/optimizer/SynthType.cpp
@@ -5211,20 +5211,7 @@ const NAType *Translate::synthesizeType()
        if (translateSource->getCharSet() == CharInfo::GBK )
          charsetTarget = CharInfo::UTF8;
        else
-       {
-           /* this is a solution to support GBK before Trafodion can support GBK in total
-            * see jira 1720 for more details
-            * the logic here is:
-            * when HIVE_FILE_CHARSET is not empty, it means the real charset in Hive table is not same as HIVE_DEFAULT_CHARSET
-            * in this case, allow the converting , ignoring the source charset checking above
-            if( CmpCommon::getDefaultString(HIVE_FILE_CHARSET) ==  "" ) 
-              err4106arg = SQLCHARSETCODE_GBK;
-            else
-             charsetTarget = CharInfo::UTF8;
-            */
-		err4106arg = SQLCHARSETSTRING_GBK;
-
-       }
+         err4106arg = SQLCHARSETSTRING_GBK;
        break;
 
      case ISO88591_TO_UTF8:


[6/9] incubator-trafodion git commit: enhance of fixes to jira 1720

Posted by hz...@apache.org.
enhance of fixes to jira 1720


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/6abfa2a6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/6abfa2a6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/6abfa2a6

Branch: refs/heads/master
Commit: 6abfa2a6522376db7d3d5abbc15efdc01334b97d
Parents: aecc2db
Author: Liu Ming <mi...@esgyn.cn>
Authored: Thu Jan 28 09:41:16 2016 +0000
Committer: Liu Ming <mi...@esgyn.cn>
Committed: Thu Jan 28 09:41:16 2016 +0000

----------------------------------------------------------------------
 core/sql/common/csconvert.cpp            |   2 +-
 core/sql/exp/exp_conv.cpp                |   8 ++++--
 core/sql/generator/GenRelScan.cpp        |  26 +++++++++++++++----
 core/sql/optimizer/BindItemExpr.cpp      |  36 +++++++++++++++++++++-----
 core/sql/optimizer/SynthType.cpp         |   8 +++---
 core/sql/regress/hive/EXPECTED005        |  26 +++++++++++++++++++
 core/sql/regress/hive/TEST005            |   7 +++++
 core/sql/regress/hive/TEST005_a.hive.sql |   9 +++++++
 core/sql/regress/hive/tbl_gbk.data       | Bin 0 -> 129 bytes
 9 files changed, 105 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/6abfa2a6/core/sql/common/csconvert.cpp
----------------------------------------------------------------------
diff --git a/core/sql/common/csconvert.cpp b/core/sql/common/csconvert.cpp
index 2423976..fc4263d 100644
--- a/core/sql/common/csconvert.cpp
+++ b/core/sql/common/csconvert.cpp
@@ -1311,7 +1311,7 @@ int gbkToUtf8(char* gbkString, size_t gbklen,
    int finalLength = charsetConvert( "gbk","utf-8", gbkString, gbklen,  result, outlen);
    
    if (finalLength == -1 ) 
-     return 0;
+     return -1;
    
    if ( addNullAtEnd )
    {

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/6abfa2a6/core/sql/exp/exp_conv.cpp
----------------------------------------------------------------------
diff --git a/core/sql/exp/exp_conv.cpp b/core/sql/exp/exp_conv.cpp
index b2a11cd..36f9a30 100644
--- a/core/sql/exp/exp_conv.cpp
+++ b/core/sql/exp/exp_conv.cpp
@@ -9329,16 +9329,20 @@ convDoIt(char * source,
     int convLen = gbkToUtf8( source, sourceLen, target, targetLen);
     if (convLen > 0) {
       copyLen = convLen; 
+      if ( varCharLen )
+        setVCLength(varCharLen, varCharLenSize, copyLen);
       //if the target length is not enough, instead of truncate, raise a SQL Error
       if (convLen > targetLen)
         ExRaiseSqlError(heap, diagsArea, EXE_STRING_OVERFLOW);
-      if ( varCharLen )
-        setVCLength(varCharLen, varCharLenSize, copyLen);
     }
     else {
       // LCOV_EXCL_START
       convLen = 0;
       copyLen = 0;
+      if ( varCharLen )
+        setVCLength(varCharLen, varCharLenSize, copyLen);
+      ExRaiseSqlError(heap, diagsArea, EXE_CONVERT_STRING_ERROR);
+      return ex_expr::EXPR_ERROR;
       // LCOV_EXCL_STOP
     }
   };

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/6abfa2a6/core/sql/generator/GenRelScan.cpp
----------------------------------------------------------------------
diff --git a/core/sql/generator/GenRelScan.cpp b/core/sql/generator/GenRelScan.cpp
index 573873f..7729b13 100644
--- a/core/sql/generator/GenRelScan.cpp
+++ b/core/sql/generator/GenRelScan.cpp
@@ -202,6 +202,7 @@ int HbaseAccess::createAsciiColAndCastExpr(Generator * generator,
   asciiValue = NULL;
   castValue = NULL;
   CollHeap * h = generator->wHeap();
+  bool needTranslate = FALSE;
 
   // if this is an upshifted datatype, remove the upshift attr.
   // We dont want to upshift data during retrievals or while building keys.
@@ -214,22 +215,32 @@ int HbaseAccess::createAsciiColAndCastExpr(Generator * generator,
       ((CharType*)newGivenType)->setUpshifted(FALSE);
     }
 
+  if (newGivenType->getTypeQualifier() == NA_CHARACTER_TYPE &&
+      CmpCommon::getDefaultString(HIVE_FILE_CHARSET) == "GBK")
+        needTranslate = TRUE;
+
   // source ascii row is a varchar where the data is a pointer to the source data
   // in the hdfs buffer.
   NAType *asciiType = NULL;
   
   if (DFS2REC::isDoubleCharacter(newGivenType->getFSDatatype()))
-    asciiType =  new (h) SQLVarChar(sizeof(Int64)/2, newGivenType->supportsSQLnull(),
+  {
+      asciiType =  new (h) SQLVarChar(sizeof(Int64)/2, newGivenType->supportsSQLnull(),
 				    FALSE, FALSE, newGivenType->getCharSet());
+  }
+  // set the source charset to GBK if HIVE_FILE_CHARSET is set
+  // HIVE_FILE_CHARSET can only be empty or GBK
+  else if (  needTranslate == TRUE )
+  {
+      asciiType =  new (h) SQLVarChar(sizeof(Int64)/2, newGivenType->supportsSQLnull(),
+                                      FALSE, FALSE, CharInfo::GBK);
+  }
   else
     asciiType = new (h) SQLVarChar(sizeof(Int64), newGivenType->supportsSQLnull());
-
   if (asciiType)
     {
       asciiValue = new (h) NATypeToItem(asciiType->newCopy(h));
-
-      castValue = new(h) Cast(asciiValue, newGivenType); 
-
+      castValue = new(h) Cast(asciiValue, newGivenType);
       if (castValue)
 	{
 	  ((Cast*)castValue)->setSrcIsVarcharPtr(TRUE);
@@ -787,6 +798,7 @@ short FileScan::codeGenForHive(Generator * generator)
   const Int32 executorPredTuppIndex = 3;
   const Int32 asciiTuppIndex = 4;
   ULng32 asciiRowLen; 
+  ULng32 translateRowLen; 
   ExpTupleDesc * asciiTupleDesc = 0;
 
   ex_cri_desc * work_cri_desc = NULL;
@@ -796,6 +808,7 @@ short FileScan::codeGenForHive(Generator * generator)
   ExpTupleDesc::TupleDataFormat asciiRowFormat = ExpTupleDesc::SQLARK_EXPLODED_FORMAT;
   ExpTupleDesc::TupleDataFormat hdfsRowFormat = ExpTupleDesc::SQLMX_ALIGNED_FORMAT;
   ValueIdList asciiVids;
+  ValueIdList transVids;
   ValueIdList executorPredCastVids;
   ValueIdList projectExprOnlyCastVids;
 
@@ -840,6 +853,7 @@ short FileScan::codeGenForHive(Generator * generator)
     asciiVids.insert(asciiValue->getValueId());
       
     castValue->bindNode(generator->getBindWA());
+
     if (convertSkipList[ii] == 1 || convertSkipList[ii] == 2)
       executorPredCastVids.insert(castValue->getValueId());
     else
@@ -1501,6 +1515,7 @@ short HbaseAccess::genRowIdExpr(Generator * generator,
 	  int res;
 	  ItemExpr * castVal = NULL;
 	  ItemExpr * asciiVal = NULL;
+	  ItemExpr * transVal = NULL;
 	  res = createAsciiColAndCastExpr(generator,
 					  givenType,
 					  asciiVal, castVal);
@@ -1596,6 +1611,7 @@ short HbaseAccess::genRowIdExprForNonSQ(Generator * generator,
 	  int res;
 	  ItemExpr * castVal = NULL;
 	  ItemExpr * asciiVal = NULL;
+	  ItemExpr * transVal = NULL;
 	  res = createAsciiColAndCastExpr(generator,
 					  givenType,
 					  asciiVal, castVal);

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/6abfa2a6/core/sql/optimizer/BindItemExpr.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/BindItemExpr.cpp b/core/sql/optimizer/BindItemExpr.cpp
index da39397..3fd854e 100644
--- a/core/sql/optimizer/BindItemExpr.cpp
+++ b/core/sql/optimizer/BindItemExpr.cpp
@@ -1497,10 +1497,10 @@ ItemExpr* Assign::tryToRelaxCharTypeMatchRules(BindWA *bindWA)
 ItemExpr* ItemExpr::tryToDoImplicitCasting(BindWA *bindWA)
 {
   ItemExpr *result = this;
-  enum {iUCS2 = 0, iISO = 1, iUTF8 = 2, iSJIS = 3, iUNK = 4};
-  Int32 Literals_involved[5] = { 0, 0, 0, 0, 0 };
-  Int32 nonLiterals_involved[5] = { 0, 0, 0, 0, 0 };
-  Int32 charsets_involved[5] = { 0, 0, 0, 0, 0 };
+  enum {iUCS2 = 0, iISO = 1, iUTF8 = 2, iSJIS = 3, iGBK = 4, iUNK = 5};
+  Int32 Literals_involved[6] = { 0, 0, 0, 0, 0, 0};
+  Int32 nonLiterals_involved[6] = { 0, 0, 0, 0, 0, 0 };
+  Int32 charsets_involved[6] = { 0, 0, 0, 0, 0, 0 };
   Int32 charsetsCount = 0;
   CharInfo::CharSet cs          = CharInfo::UnknownCharSet;
   CharInfo::CharSet curr_chld_cs= CharInfo::UnknownCharSet;
@@ -1545,6 +1545,10 @@ ItemExpr* ItemExpr::tryToDoImplicitCasting(BindWA *bindWA)
           cur_chld_cs_ndx = iSJIS;
           break;
 
+        case CharInfo::GBK:
+          cur_chld_cs_ndx = iGBK;
+          break;
+
         //case CharInfo::KANJI_MP:
         //case CharInfo::KSC5601_MP:
         default:
@@ -1593,6 +1597,8 @@ ItemExpr* ItemExpr::tryToDoImplicitCasting(BindWA *bindWA)
           cs = CharInfo::UTF8;
        else if ( Literals_involved[iSJIS] > 0 )
           cs = CharInfo::SJIS;
+       else if ( Literals_involved[iGBK] > 0 )
+          cs = CharInfo::GBK;
 
        //
        // Now, we may be able to optimize by translating the 1st child
@@ -1601,7 +1607,7 @@ ItemExpr* ItemExpr::tryToDoImplicitCasting(BindWA *bindWA)
        //
        if ( ( cs == chld0_cs ) &&  ( arity == 2 ) &&
                ( curr_chld_opType != ITM_TRANSLATE ) &&
-               ( charsetsCount == (charsets_involved[iUCS2] + charsets_involved[iUTF8]) ) )
+               ( charsetsCount == (charsets_involved[iUCS2] + charsets_involved[iUTF8] + charsets_involved[iGBK]) ) )
        {
           if ( chld0_opType == ITM_TRANSLATE )
              cs = curr_chld_cs;  //...because we will eliminate a translate op
@@ -1631,7 +1637,22 @@ ItemExpr* ItemExpr::tryToDoImplicitCasting(BindWA *bindWA)
         if ( desiredType->getTypeQualifier() == NA_CHARACTER_TYPE )
         {
            CharInfo::CharSet Desired_cs = ((const CharType*)desiredType)->getCharSet();
-           if ( (chld_cs != Desired_cs) && ( ! ((Cast *)this)->tgtCharSetSpecified() ) )
+           /*
+           * this is a special handling for jira 1720, only used in a bulkload scenario
+           * that is, when user set the HIVE_FILE_CHARSET to 'gbk', it means the data saved in hive
+           * table is encoded as GBK. Trafodion default all Hive data charset as 'UTF8', so 
+           * this will allow the auto charset converting to happen during bulk load
+           * the reason is:
+           * hive scan will mark the source column as GBK when HIVE_FILE_CHARSET is set to GBK
+           * which is the only value it can be 
+           * So the bind will invoke this implicit casting method to check if an auto charset 
+           * converting is needed. 
+           * In the hive scan, it does not set the tgtCharSetSpecified field, so in order to 
+           * force it to perform a translate, add a checking here
+           */
+           if( (chld_cs != Desired_cs) && CmpCommon::getDefaultString(HIVE_FILE_CHARSET) == "GBK" )
+              result = performImplicitCasting( Desired_cs, bindWA );
+           else if ( (chld_cs != Desired_cs) && ( ! ((Cast *)this)->tgtCharSetSpecified() ) )
            {
               //
               // Looks like user said CAST( ... as [var]char(NNN) ) 
@@ -1696,6 +1717,9 @@ ItemExpr* ItemExpr::tryToDoImplicitCasting(BindWA *bindWA)
         case Translate::UCS2_TO_UTF8:
              Required_cs = CharInfo::UNICODE;
              break;
+	case Translate::GBK_TO_UTF8:
+	     Required_cs = CharInfo::GBK;
+             break;
         default:
              break;
      }

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/6abfa2a6/core/sql/optimizer/SynthType.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/SynthType.cpp b/core/sql/optimizer/SynthType.cpp
index 7d1c8a9..5736422 100644
--- a/core/sql/optimizer/SynthType.cpp
+++ b/core/sql/optimizer/SynthType.cpp
@@ -5217,11 +5217,13 @@ const NAType *Translate::synthesizeType()
             * the logic here is:
             * when HIVE_FILE_CHARSET is not empty, it means the real charset in Hive table is not same as HIVE_DEFAULT_CHARSET
             * in this case, allow the converting , ignoring the source charset checking above
-            */
-            if( CmpCommon::getDefaultString(HIVE_FILE_CHARSET) ==  "" ) //CmpCommon::getDefaultString(HIVE_DEFAULT_CHARSET) )
-              err4106arg = SQLCHARSETCODE_GB2312;
+            if( CmpCommon::getDefaultString(HIVE_FILE_CHARSET) ==  "" ) 
+              err4106arg = SQLCHARSETCODE_GBK;
             else
              charsetTarget = CharInfo::UTF8;
+            */
+		err4106arg = SQLCHARSETSTRING_GBK;
+
        }
        break;
 

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/6abfa2a6/core/sql/regress/hive/EXPECTED005
----------------------------------------------------------------------
diff --git a/core/sql/regress/hive/EXPECTED005 b/core/sql/regress/hive/EXPECTED005
index 49d8a62..9299d25 100644
--- a/core/sql/regress/hive/EXPECTED005
+++ b/core/sql/regress/hive/EXPECTED005
@@ -447,4 +447,30 @@ TINT    SM      I            BIG                   STR                        F
 
 --- 1 row(s) selected.
 >>
+>>cqd HIVE_FILE_CHARSET 'GBK';
+
+--- SQL operation complete.
+>>select c1, CONVERTTOHEX(c2) from tbl_gbk;
+
+C1           (EXPR)
+-----------  --------------------------------------------------
+
+          3  EC8B90EC978E
+          5  EC8B90EC978E
+          2  EC8B90EC978E
+          4  EC8B90EC978E
+          6  EC8B90EC978E
+          7  EC8B90EC978E
+          8  EC8B90EC978E
+          3  ECBB93EB9F8FECAB97EB9B91
+          2  ECBB93EB9F8FECAB97EB9B91
+          6  ECBB93EB9F8FECAB97EB9B91
+         19  ECBB93EB9F8FECAB97EB9B91
+          8  ECBB93EB9F8FECAB97EB9B91
+
+--- 12 row(s) selected.
+>>cqd HIVE_FILE_CHARSET reset;
+
+--- SQL operation complete.
+>>
 >>log;

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/6abfa2a6/core/sql/regress/hive/TEST005
----------------------------------------------------------------------
diff --git a/core/sql/regress/hive/TEST005 b/core/sql/regress/hive/TEST005
index b8f7518..5bc2a21 100644
--- a/core/sql/regress/hive/TEST005
+++ b/core/sql/regress/hive/TEST005
@@ -30,16 +30,19 @@ sh regrhadoop.ksh fs -mkdir  /user/hive/exttables/customer_ddl;
 sh regrhadoop.ksh fs -mkdir  /user/hive/exttables/customer_temp;
 sh regrhadoop.ksh fs -mkdir  /user/hive/exttables/tbl_utf8;
 sh regrhadoop.ksh fs -mkdir  /user/hive/exttables/tbl_type;
+sh regrhadoop.ksh fs -mkdir  /user/hive/exttables/tbl_gbk;
 --empty folders
 sh regrhadoop.ksh fs -rm   /user/hive/exttables/customer_ddl/*;
 sh regrhadoop.ksh fs -rm   /user/hive/exttables/customer_temp/*;
 sh regrhadoop.ksh fs -rm   /user/hive/exttables/tbl_utf8/*;
 sh regrhadoop.ksh fs -rm   /user/hive/exttables/tbl_type/*;
+sh regrhadoop.ksh fs -rm   /user/hive/exttables/tbl_gbk/*;
 
 --- setup Hive tables
 sh regrhive.ksh -v -f $REGRTSTDIR/TEST005_a.hive.sql;
 sh regrhadoop.ksh fs -put $REGRTSTDIR/tbl_utf8.data /user/hive/exttables/tbl_utf8;
 sh regrhadoop.ksh fs -put $REGRTSTDIR/tbl_type.data /user/hive/exttables/tbl_type;
+sh regrhadoop.ksh fs -put $REGRTSTDIR/tbl_gbk.data /user/hive/exttables/tbl_gbk;
 
 log LOG005 clear;
 
@@ -219,4 +222,8 @@ select * from tbl_type;
 insert into tbl_type_temp select * from tbl_type;
 select * from tbl_type_temp;
 
+cqd HIVE_FILE_CHARSET 'GBK';
+select c1, CONVERTTOHEX(c2) from tbl_gbk;
+cqd HIVE_FILE_CHARSET reset;
+
 log;

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/6abfa2a6/core/sql/regress/hive/TEST005_a.hive.sql
----------------------------------------------------------------------
diff --git a/core/sql/regress/hive/TEST005_a.hive.sql b/core/sql/regress/hive/TEST005_a.hive.sql
index ab4098f..1b5c580 100644
--- a/core/sql/regress/hive/TEST005_a.hive.sql
+++ b/core/sql/regress/hive/TEST005_a.hive.sql
@@ -154,3 +154,12 @@ create table tbl_type_temp
      t           timestamp
 )
 row format delimited fields terminated by '|';
+
+drop table tbl_gbk;
+create external table tbl_gbk
+(
+    c1           int,
+    c2           string
+)
+row format delimited fields terminated by '\t'
+location '/user/hive/exttables/tbl_gbk';

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/6abfa2a6/core/sql/regress/hive/tbl_gbk.data
----------------------------------------------------------------------
diff --git a/core/sql/regress/hive/tbl_gbk.data b/core/sql/regress/hive/tbl_gbk.data
new file mode 100644
index 0000000..2fa331b
Binary files /dev/null and b/core/sql/regress/hive/tbl_gbk.data differ