You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafodion.apache.org by hz...@apache.org on 2016/02/01 22:49:33 UTC
[1/9] incubator-trafodion git commit: for jira 1720,
add support to convert gbk into utf8
Repository: incubator-trafodion
Updated Branches:
refs/heads/master aa3deffb1 -> 9cc1e835f
for jira 1720, add support to convert gbk into utf8
Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/d51d2016
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/d51d2016
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/d51d2016
Branch: refs/heads/master
Commit: d51d2016d29725e3d863b351b3a548ee1b325c0c
Parents: 141f354
Author: Cloud User <ce...@ming01.novalocal>
Authored: Sat Jan 9 11:43:21 2016 +0000
Committer: Cloud User <ce...@ming01.novalocal>
Committed: Sat Jan 9 11:43:21 2016 +0000
----------------------------------------------------------------------
core/sql/common/csconvert.cpp | 42 ++++++++++++++++++++++++++++++++
core/sql/common/csconvert.h | 4 +++
core/sql/exp/exp_clause_derived.h | 4 ++-
core/sql/exp/exp_conv.cpp | 26 ++++++++++++++++++++
core/sql/generator/GenItemFunc.cpp | 3 +++
core/sql/optimizer/ItemExpr.cpp | 2 ++
core/sql/optimizer/ItemFunc.h | 1 +
core/sql/optimizer/SynthType.cpp | 12 +++++++++
core/sql/sqlcomp/DefaultConstants.h | 3 +++
core/sql/sqlcomp/nadefaults.cpp | 2 ++
10 files changed, 98 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/common/csconvert.cpp
----------------------------------------------------------------------
diff --git a/core/sql/common/csconvert.cpp b/core/sql/common/csconvert.cpp
index ffc5370..b0d21af 100644
--- a/core/sql/common/csconvert.cpp
+++ b/core/sql/common/csconvert.cpp
@@ -30,6 +30,10 @@
// but also used by the ODBC build and maybe others.
#include <limits.h>
+#include <iconv.h>
+#include <stdio.h>
+#include <stdlib.h>
+
#include "multi-byte.h"
#include "fcconv.h"
#include "csconvert.h"
@@ -1275,3 +1279,41 @@ char * findStartOfChar( char *someByteInChar, char *startOfBuffer )
rtnv-- ;
return rtnv ;
}
+/* A method to do character set conversion , using Glibc iconv */
+int code_convert(const char *from_charset,const char *to_charset,char *inbuf, size_t inlen, char *outbuf,size_t outlen)
+{
+ iconv_t cd;
+ int rc;
+ char **pin = &inbuf;
+ char **pout = &outbuf;
+
+ cd = iconv_open(to_charset,from_charset);
+ if (cd==0) return -1;
+ memset(outbuf,0,outlen);
+ if (iconv(cd,pin,(size_t*)&inlen,pout,(size_t *)&outlen)==-1)
+ {
+ iconv_close(cd);
+ return -1;
+ }
+ iconv_close(cd);
+ return outlen;
+}
+/* from gbk to utf8 */
+int gbk2utf8(char *inbuf,size_t inlen,char *outbuf,size_t outlen)
+{
+ return code_convert("gbk","utf-8",inbuf,inlen,outbuf,outlen);
+}
+
+int gbkToUtf8(char* gbkString, size_t gbklen,
+ char* result ,size_t outlen, int addNullAtEnd)
+{
+
+ int finalLength = gbk2utf8 ( gbkString, gbklen, result, outlen);
+
+ if (finalLength == -1 ) return 0;
+
+ if ( addNullAtEnd > 0 )
+ result[finalLength] = 0;
+
+ return finalLength;
+}
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/common/csconvert.h
----------------------------------------------------------------------
diff --git a/core/sql/common/csconvert.h b/core/sql/common/csconvert.h
index 56c9603..57fec71 100644
--- a/core/sql/common/csconvert.h
+++ b/core/sql/common/csconvert.h
@@ -106,6 +106,10 @@ int UTF16ToLocale( const enum cnv_version version,
unsigned int * translated_char_cnt_p = NULL ,
const char *substitution_char = NULL );
+NA_EIDPROC
+int gbkToUtf8(char* gbkString, size_t gbklen,
+ char* result ,size_t outlen, int addNullAtEnd=FALSE);
+
/*
* LocaleCharToUCS4() converts the FIRST char in the input string to its
* UCS4 value. Returns the UCS4 value at location specified AND the
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/exp/exp_clause_derived.h
----------------------------------------------------------------------
diff --git a/core/sql/exp/exp_clause_derived.h b/core/sql/exp/exp_clause_derived.h
index fc6ecfe..4558431 100644
--- a/core/sql/exp/exp_clause_derived.h
+++ b/core/sql/exp/exp_clause_derived.h
@@ -1542,7 +1542,9 @@ enum conv_case_index {
CONV_UTF8_F_UCS2_V =248,
CONV_BLOB_BLOB =249,
- CONV_BLOB_ASCII_F =250
+ CONV_BLOB_ASCII_F =250,
+
+ CONV_GBK_F_UTF8_V =251
};
class SQLEXP_LIB_FUNC ex_conv_clause : public ex_clause {
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/exp/exp_conv.cpp
----------------------------------------------------------------------
diff --git a/core/sql/exp/exp_conv.cpp b/core/sql/exp/exp_conv.cpp
index 8c31efc..015ec0f 100644
--- a/core/sql/exp/exp_conv.cpp
+++ b/core/sql/exp/exp_conv.cpp
@@ -9321,6 +9321,32 @@ convDoIt(char * source,
};
break;
+// gb2312 -> utf8
+ case CONV_GBK_F_UTF8_V:
+ {
+ char * targetbuf = new char[sourceLen*4+1];
+ size_t sl = sourceLen;
+ int convLen = gbkToUtf8( source, sl, targetbuf, sl*4);
+ int copyLen = 0;
+ if (convLen > 0) {
+ copyLen = (convLen< targetLen) ? convLen: targetLen;
+ str_cpy_all(target, targetbuf, copyLen);
+ // if (convLen > targetLen)
+
+ }
+ else {
+ // LCOV_EXCL_START
+ convLen = 0;
+ copyLen = 0;
+ // LCOV_EXCL_STOP
+ }
+
+ if ( varCharLen )
+ setVCLength(varCharLen, varCharLenSize, copyLen);
+ delete targetbuf;
+
+ };
+ break;
// 5/10/98: sjis -> unicode
case CONV_SJIS_F_UNICODE_F:
case CONV_SJIS_F_UNICODE_V:
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/generator/GenItemFunc.cpp
----------------------------------------------------------------------
diff --git a/core/sql/generator/GenItemFunc.cpp b/core/sql/generator/GenItemFunc.cpp
index c7e6748..8c8f7e6 100644
--- a/core/sql/generator/GenItemFunc.cpp
+++ b/core/sql/generator/GenItemFunc.cpp
@@ -1737,6 +1737,9 @@ short Translate::codeGen(Generator * generator)
case UCS2_TO_UTF8:
convType = CONV_UCS2_F_UTF8_V;
break;
+ case GBK_TO_UTF8:
+ convType = CONV_GBK_F_UTF8_V;
+ break;
case UNICODE_TO_ISO88591:
convType = CONV_UNICODE_F_ASCII_V;
break;
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/optimizer/ItemExpr.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/ItemExpr.cpp b/core/sql/optimizer/ItemExpr.cpp
index 41b6b76..7df1a7e 100644
--- a/core/sql/optimizer/ItemExpr.cpp
+++ b/core/sql/optimizer/ItemExpr.cpp
@@ -13408,6 +13408,8 @@ Translate::Translate(ItemExpr *valPtr, NAString* map_table_name)
map_table_id_ = Translate::SJIS_TO_UTF8;
else if ( _strcmpi(map_table_name->data(), "UTF8TOSJIS") == 0 )
map_table_id_ = Translate::UTF8_TO_SJIS;
+ else if ( _strcmpi(map_table_name->data(), "GBKTOUTF8") == 0 )
+ map_table_id_ = Translate::GBK_TO_UTF8;
else
if ( _strcmpi(map_table_name->data(), "KANJITOISO88591") == 0 )
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/optimizer/ItemFunc.h
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/ItemFunc.h b/core/sql/optimizer/ItemFunc.h
index 3bb7238..d794a3d 100644
--- a/core/sql/optimizer/ItemFunc.h
+++ b/core/sql/optimizer/ItemFunc.h
@@ -2112,6 +2112,7 @@ public:
UTF8_TO_SJIS, SJIS_TO_UTF8, UTF8_TO_ISO88591,
ISO88591_TO_UTF8,
KANJI_MP_TO_ISO88591, KSC5601_MP_TO_ISO88591,
+ GBK_TO_UTF8,
UNKNOWN_TRANSLATION};
Translate(ItemExpr *valPtr, NAString* map_table_name);
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/optimizer/SynthType.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/SynthType.cpp b/core/sql/optimizer/SynthType.cpp
index b9d5518..56e341f 100644
--- a/core/sql/optimizer/SynthType.cpp
+++ b/core/sql/optimizer/SynthType.cpp
@@ -5207,6 +5207,18 @@ const NAType *Translate::synthesizeType()
err4106arg = SQLCHARSETSTRING_UTF8;
break;
+ case GBK_TO_UTF8:
+ if (translateSource->getCharSet() == CharInfo::GBK || translateSource->getCharSet() == CharInfo::UnknownCharSet )
+ charsetTarget = CharInfo::UTF8;
+ else
+ {
+ if( CmpCommon::getDefaultString(HIVE_FILE_CHARSET) == CmpCommon::getDefaultString(HIVE_DEFAULT_CHARSET) )
+ err4106arg = SQLCHARSETCODE_GB2312;
+ else
+ charsetTarget = CharInfo::UTF8;
+ }
+ break;
+
case ISO88591_TO_UTF8:
if (translateSource->getCharSet() == CharInfo::ISO88591)
{
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/sqlcomp/DefaultConstants.h
----------------------------------------------------------------------
diff --git a/core/sql/sqlcomp/DefaultConstants.h b/core/sql/sqlcomp/DefaultConstants.h
index 2778197..d10acc8 100644
--- a/core/sql/sqlcomp/DefaultConstants.h
+++ b/core/sql/sqlcomp/DefaultConstants.h
@@ -3785,6 +3785,9 @@ enum DefaultConstants
// set to ON to aggressively allocate ESP per core
AGGRESSIVE_ESP_ALLOCATION_PER_CORE,
+ // real charset in the HIVE table
+ HIVE_FILE_CHARSET,
+
// This enum constant must be the LAST one in the list; it's a count,
// not an Attribute (it's not IN DefaultDefaults; it's the SIZE of it)!
__NUM_DEFAULT_ATTRIBUTES
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/sqlcomp/nadefaults.cpp
----------------------------------------------------------------------
diff --git a/core/sql/sqlcomp/nadefaults.cpp b/core/sql/sqlcomp/nadefaults.cpp
index f6d5604..cfe2cf3 100644
--- a/core/sql/sqlcomp/nadefaults.cpp
+++ b/core/sql/sqlcomp/nadefaults.cpp
@@ -1958,6 +1958,7 @@ SDDkwd__(EXE_DIAGNOSTIC_EVENTS, "OFF"),
DDkwd__(HIVE_DEFAULT_CHARSET, (char *)SQLCHARSETSTRING_UTF8),
DD_____(HIVE_DEFAULT_SCHEMA, "HIVE"),
+ DD_____(HIVE_FILE_CHARSET, (char *)SQLCHARSETSTRING_UTF8),
DD_____(HIVE_FILE_NAME, "/hive/tpcds/customer/customer.dat" ),
DD_____(HIVE_HDFS_STATS_LOG_FILE, ""),
DDint__(HIVE_LIB_HDFS_PORT_OVERRIDE, "-1"),
@@ -6390,6 +6391,7 @@ DefaultToken NADefaults::token(Int32 attrEnum,
else {
if ((attrEnum == TERMINAL_CHARSET) ||
(attrEnum == USE_HIVE_SOURCE) ||
+ (attrEnum == HIVE_FILE_CHARSET) ||
(attrEnum == HBASE_DATA_BLOCK_ENCODING_OPTION) ||
(attrEnum == HBASE_COMPRESSION_OPTION))
return DF_USER;
[4/9] incubator-trafodion git commit: fix more according to jira 1720
review comments
Posted by hz...@apache.org.
fix more according to jira 1720 review comments
Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/82b256c0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/82b256c0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/82b256c0
Branch: refs/heads/master
Commit: 82b256c00859ae80fb83351c940bddb12ce1e15b
Parents: 3efa731
Author: Cloud User <ce...@ming01.novalocal>
Authored: Wed Jan 13 13:54:21 2016 +0000
Committer: Cloud User <ce...@ming01.novalocal>
Committed: Wed Jan 13 13:54:21 2016 +0000
----------------------------------------------------------------------
core/sql/common/csconvert.cpp | 12 +++---------
core/sql/exp/exp_conv.cpp | 17 ++++++-----------
core/sql/optimizer/SynthType.cpp | 10 ++++++++--
core/sql/sqlcomp/nadefaults.cpp | 2 +-
4 files changed, 18 insertions(+), 23 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/82b256c0/core/sql/common/csconvert.cpp
----------------------------------------------------------------------
diff --git a/core/sql/common/csconvert.cpp b/core/sql/common/csconvert.cpp
index a18b1c3..438ee8b 100644
--- a/core/sql/common/csconvert.cpp
+++ b/core/sql/common/csconvert.cpp
@@ -1279,8 +1279,9 @@ char * findStartOfChar( char *someByteInChar, char *startOfBuffer )
rtnv-- ;
return rtnv ;
}
+
/* A method to do character set conversion , using Glibc iconv */
-int code_convert(const char *from_charset,const char *to_charset,char *inbuf, size_t inlen, char *outbuf,size_t outlen)
+static int code_convert(const char *from_charset,const char *to_charset,char *inbuf, size_t inlen, char *outbuf,size_t outlen)
{
iconv_t cd;
int rc;
@@ -1298,23 +1299,16 @@ int code_convert(const char *from_charset,const char *to_charset,char *inbuf, si
iconv_close(cd);
return outlen;
}
-/* from gbk to utf8 */
-int gbk2utf8(char *inbuf,size_t inlen,char *outbuf,size_t outlen)
-{
- return code_convert("gbk","utf-8",inbuf,inlen,outbuf,outlen);
-}
/* convert gbk string into UTF8 */
int gbkToUtf8(char* gbkString, size_t gbklen,
char* result ,size_t outlen, bool addNullAtEnd)
{
int originalOutlen = outlen;
- int finalLength = gbk2utf8 ( gbkString, gbklen, result, outlen);
+ int finalLength = code_convert( "gbk","utf-8", gbkString, gbklen, result, outlen);
if (finalLength == -1 ) return 0;
- //the result is allocated with lenght originalOutlen + 1
- //so no overrun is possible
if ( addNullAtEnd )
{
if(originalOutlen >= finalLength )
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/82b256c0/core/sql/exp/exp_conv.cpp
----------------------------------------------------------------------
diff --git a/core/sql/exp/exp_conv.cpp b/core/sql/exp/exp_conv.cpp
index 319727f..b2a11cd 100644
--- a/core/sql/exp/exp_conv.cpp
+++ b/core/sql/exp/exp_conv.cpp
@@ -9322,18 +9322,18 @@ convDoIt(char * source,
break;
// gb2312 -> utf8
+// JIRA 1720
case CONV_GBK_F_UTF8_V:
{
- char * targetbuf = new char[sourceLen*4+1];
- size_t sl = sourceLen;
- int convLen = gbkToUtf8( source, sl, targetbuf, sl*4);
int copyLen = 0;
+ int convLen = gbkToUtf8( source, sourceLen, target, targetLen);
if (convLen > 0) {
- copyLen = (convLen< targetLen) ? convLen: targetLen;
- str_cpy_all(target, targetbuf, copyLen);
+ copyLen = convLen;
//if the target length is not enough, instead of truncate, raise a SQL Error
if (convLen > targetLen)
- ExRaiseSqlError(heap, diagsArea, EXE_STRING_OVERFLOW);
+ ExRaiseSqlError(heap, diagsArea, EXE_STRING_OVERFLOW);
+ if ( varCharLen )
+ setVCLength(varCharLen, varCharLenSize, copyLen);
}
else {
// LCOV_EXCL_START
@@ -9341,11 +9341,6 @@ convDoIt(char * source,
copyLen = 0;
// LCOV_EXCL_STOP
}
-
- if ( varCharLen )
- setVCLength(varCharLen, varCharLenSize, copyLen);
- delete targetbuf;
-
};
break;
// 5/10/98: sjis -> unicode
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/82b256c0/core/sql/optimizer/SynthType.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/SynthType.cpp b/core/sql/optimizer/SynthType.cpp
index 56e341f..7d1c8a9 100644
--- a/core/sql/optimizer/SynthType.cpp
+++ b/core/sql/optimizer/SynthType.cpp
@@ -5208,11 +5208,17 @@ const NAType *Translate::synthesizeType()
break;
case GBK_TO_UTF8:
- if (translateSource->getCharSet() == CharInfo::GBK || translateSource->getCharSet() == CharInfo::UnknownCharSet )
+ if (translateSource->getCharSet() == CharInfo::GBK )
charsetTarget = CharInfo::UTF8;
else
{
- if( CmpCommon::getDefaultString(HIVE_FILE_CHARSET) == CmpCommon::getDefaultString(HIVE_DEFAULT_CHARSET) )
+ /* this is a solution to support GBK before Trafodion can support GBK in total
+ * see jira 1720 for more details
+ * the logic here is:
+ * when HIVE_FILE_CHARSET is not empty, it means the real charset in Hive table is not same as HIVE_DEFAULT_CHARSET
+ * in this case, allow the converting , ignoring the source charset checking above
+ */
+ if( CmpCommon::getDefaultString(HIVE_FILE_CHARSET) == "" ) //CmpCommon::getDefaultString(HIVE_DEFAULT_CHARSET) )
err4106arg = SQLCHARSETCODE_GB2312;
else
charsetTarget = CharInfo::UTF8;
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/82b256c0/core/sql/sqlcomp/nadefaults.cpp
----------------------------------------------------------------------
diff --git a/core/sql/sqlcomp/nadefaults.cpp b/core/sql/sqlcomp/nadefaults.cpp
index cfe2cf3..20152d6 100644
--- a/core/sql/sqlcomp/nadefaults.cpp
+++ b/core/sql/sqlcomp/nadefaults.cpp
@@ -1958,7 +1958,7 @@ SDDkwd__(EXE_DIAGNOSTIC_EVENTS, "OFF"),
DDkwd__(HIVE_DEFAULT_CHARSET, (char *)SQLCHARSETSTRING_UTF8),
DD_____(HIVE_DEFAULT_SCHEMA, "HIVE"),
- DD_____(HIVE_FILE_CHARSET, (char *)SQLCHARSETSTRING_UTF8),
+ DD_____(HIVE_FILE_CHARSET, ""),
DD_____(HIVE_FILE_NAME, "/hive/tpcds/customer/customer.dat" ),
DD_____(HIVE_HDFS_STATS_LOG_FILE, ""),
DDint__(HIVE_LIB_HDFS_PORT_OVERRIDE, "-1"),
[2/9] incubator-trafodion git commit: fix the pull-request comments
for jira1720
Posted by hz...@apache.org.
fix the pull-request comments for jira1720
Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/12a0c3e4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/12a0c3e4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/12a0c3e4
Branch: refs/heads/master
Commit: 12a0c3e4006d9c2b2cf9e62f23121b27ddee17c9
Parents: d51d201
Author: Cloud User <ce...@ming01.novalocal>
Authored: Tue Jan 12 14:43:03 2016 +0000
Committer: Cloud User <ce...@ming01.novalocal>
Committed: Tue Jan 12 14:43:03 2016 +0000
----------------------------------------------------------------------
core/sql/common/csconvert.cpp | 10 ++++++++--
core/sql/common/csconvert.h | 2 +-
core/sql/exp/exp_conv.cpp | 5 +++--
3 files changed, 12 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/12a0c3e4/core/sql/common/csconvert.cpp
----------------------------------------------------------------------
diff --git a/core/sql/common/csconvert.cpp b/core/sql/common/csconvert.cpp
index b0d21af..f94ec2a 100644
--- a/core/sql/common/csconvert.cpp
+++ b/core/sql/common/csconvert.cpp
@@ -1304,16 +1304,22 @@ int gbk2utf8(char *inbuf,size_t inlen,char *outbuf,size_t outlen)
return code_convert("gbk","utf-8",inbuf,inlen,outbuf,outlen);
}
+/* convert gbk string into UTF8 */
int gbkToUtf8(char* gbkString, size_t gbklen,
char* result ,size_t outlen, int addNullAtEnd)
{
-
+ int originalOutlen = outlen;
int finalLength = gbk2utf8 ( gbkString, gbklen, result, outlen);
if (finalLength == -1 ) return 0;
+ //the result is allocated with lenght originalOutlen + 1
+ //so no overrun is possible
if ( addNullAtEnd > 0 )
- result[finalLength] = 0;
+ {
+ if(originalOutlen >= finalLength )
+ result[finalLength] = 0;
+ }
return finalLength;
}
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/12a0c3e4/core/sql/common/csconvert.h
----------------------------------------------------------------------
diff --git a/core/sql/common/csconvert.h b/core/sql/common/csconvert.h
index 57fec71..810fae5 100644
--- a/core/sql/common/csconvert.h
+++ b/core/sql/common/csconvert.h
@@ -108,7 +108,7 @@ int UTF16ToLocale( const enum cnv_version version,
NA_EIDPROC
int gbkToUtf8(char* gbkString, size_t gbklen,
- char* result ,size_t outlen, int addNullAtEnd=FALSE);
+ char* result ,size_t outlen, bool addNullAtEnd=FALSE);
/*
* LocaleCharToUCS4() converts the FIRST char in the input string to its
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/12a0c3e4/core/sql/exp/exp_conv.cpp
----------------------------------------------------------------------
diff --git a/core/sql/exp/exp_conv.cpp b/core/sql/exp/exp_conv.cpp
index 015ec0f..319727f 100644
--- a/core/sql/exp/exp_conv.cpp
+++ b/core/sql/exp/exp_conv.cpp
@@ -9331,8 +9331,9 @@ convDoIt(char * source,
if (convLen > 0) {
copyLen = (convLen< targetLen) ? convLen: targetLen;
str_cpy_all(target, targetbuf, copyLen);
- // if (convLen > targetLen)
-
+ //if the target length is not enough, instead of truncate, raise a SQL Error
+ if (convLen > targetLen)
+ ExRaiseSqlError(heap, diagsArea, EXE_STRING_OVERFLOW);
}
else {
// LCOV_EXCL_START
[5/9] incubator-trafodion git commit: refine the iconv function,
remove unnecessary code to boost perf further
Posted by hz...@apache.org.
refine the iconv function, remove unnecessary code to boost perf further
Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/aecc2db1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/aecc2db1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/aecc2db1
Branch: refs/heads/master
Commit: aecc2db1d76b1f030fc115060a2ad27de4802f65
Parents: 82b256c
Author: Liu Ming <mi...@esgyn.cn>
Authored: Wed Jan 13 14:09:26 2016 +0000
Committer: Liu Ming <mi...@esgyn.cn>
Committed: Wed Jan 13 14:09:26 2016 +0000
----------------------------------------------------------------------
core/sql/common/csconvert.cpp | 30 +++++++++++++++++-------------
1 file changed, 17 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/aecc2db1/core/sql/common/csconvert.cpp
----------------------------------------------------------------------
diff --git a/core/sql/common/csconvert.cpp b/core/sql/common/csconvert.cpp
index 438ee8b..2423976 100644
--- a/core/sql/common/csconvert.cpp
+++ b/core/sql/common/csconvert.cpp
@@ -1281,21 +1281,24 @@ char * findStartOfChar( char *someByteInChar, char *startOfBuffer )
}
/* A method to do character set conversion , using Glibc iconv */
-static int code_convert(const char *from_charset,const char *to_charset,char *inbuf, size_t inlen, char *outbuf,size_t outlen)
+static int charsetConvert(const char *srcCharset,const char *targetCharset,char *inputbuf, size_t inputlen, char *outbuf,size_t outlen)
{
+ char **ptrin = &inputbuf;
+ char **ptrout = &outbuf;
+
iconv_t cd;
- int rc;
- char **pin = &inbuf;
- char **pout = &outbuf;
-
- cd = iconv_open(to_charset,from_charset);
- if (cd==0) return -1;
- memset(outbuf,0,outlen);
- if (iconv(cd,pin,(size_t*)&inlen,pout,(size_t *)&outlen)==-1)
+ cd = iconv_open(targetCharset,srcCharset);
+
+ if (cd==0)
+ return -1;
+
+ if (iconv(cd,ptrin,(size_t*)&inputlen,ptrout,(size_t *)&outlen) == -1)
{
+ //error occurs
iconv_close(cd);
return -1;
}
+
iconv_close(cd);
return outlen;
}
@@ -1305,14 +1308,15 @@ int gbkToUtf8(char* gbkString, size_t gbklen,
char* result ,size_t outlen, bool addNullAtEnd)
{
int originalOutlen = outlen;
- int finalLength = code_convert( "gbk","utf-8", gbkString, gbklen, result, outlen);
+ int finalLength = charsetConvert( "gbk","utf-8", gbkString, gbklen, result, outlen);
- if (finalLength == -1 ) return 0;
+ if (finalLength == -1 )
+ return 0;
if ( addNullAtEnd )
{
- if(originalOutlen >= finalLength )
- result[finalLength] = 0;
+ if(originalOutlen >= finalLength )
+ result[finalLength] = 0;
}
return finalLength;
[3/9] incubator-trafodion git commit: fix comments for jira 1720
Posted by hz...@apache.org.
fix comments for jira 1720
Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/3efa7313
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/3efa7313
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/3efa7313
Branch: refs/heads/master
Commit: 3efa7313c51b31b4d1a3be28a84a083a63dd0178
Parents: 12a0c3e
Author: Cloud User <ce...@ming01.novalocal>
Authored: Tue Jan 12 15:56:46 2016 +0000
Committer: Cloud User <ce...@ming01.novalocal>
Committed: Tue Jan 12 15:56:46 2016 +0000
----------------------------------------------------------------------
core/sql/common/csconvert.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/3efa7313/core/sql/common/csconvert.cpp
----------------------------------------------------------------------
diff --git a/core/sql/common/csconvert.cpp b/core/sql/common/csconvert.cpp
index f94ec2a..a18b1c3 100644
--- a/core/sql/common/csconvert.cpp
+++ b/core/sql/common/csconvert.cpp
@@ -1306,7 +1306,7 @@ int gbk2utf8(char *inbuf,size_t inlen,char *outbuf,size_t outlen)
/* convert gbk string into UTF8 */
int gbkToUtf8(char* gbkString, size_t gbklen,
- char* result ,size_t outlen, int addNullAtEnd)
+ char* result ,size_t outlen, bool addNullAtEnd)
{
int originalOutlen = outlen;
int finalLength = gbk2utf8 ( gbkString, gbklen, result, outlen);
@@ -1315,7 +1315,7 @@ int gbkToUtf8(char* gbkString, size_t gbklen,
//the result is allocated with lenght originalOutlen + 1
//so no overrun is possible
- if ( addNullAtEnd > 0 )
+ if ( addNullAtEnd )
{
if(originalOutlen >= finalLength )
result[finalLength] = 0;
[9/9] incubator-trafodion git commit: [TRAFODION-1720] Add support to
convert gbk into utf8
Posted by hz...@apache.org.
[TRAFODION-1720] Add support to convert gbk into utf8
Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/9cc1e835
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/9cc1e835
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/9cc1e835
Branch: refs/heads/master
Commit: 9cc1e835fde12af5c0ce0d74395876ab48a071cc
Parents: aa3deff e6e88d0
Author: Hans Zeller <hz...@apache.org>
Authored: Mon Feb 1 21:47:43 2016 +0000
Committer: Hans Zeller <hz...@apache.org>
Committed: Mon Feb 1 21:47:43 2016 +0000
----------------------------------------------------------------------
core/sql/common/csconvert.cpp | 48 ++++++++++++++++++++++++++
core/sql/common/csconvert.h | 4 +++
core/sql/exp/exp_clause_derived.h | 4 ++-
core/sql/exp/exp_conv.cpp | 26 ++++++++++++++
core/sql/generator/GenItemFunc.cpp | 3 ++
core/sql/generator/GenRelScan.cpp | 22 +++++++++---
core/sql/optimizer/BindItemExpr.cpp | 36 +++++++++++++++----
core/sql/optimizer/ItemExpr.cpp | 2 ++
core/sql/optimizer/ItemFunc.h | 1 +
core/sql/optimizer/SynthType.cpp | 7 ++++
core/sql/regress/hive/EXPECTED005 | 26 ++++++++++++++
core/sql/regress/hive/TEST005 | 7 ++++
core/sql/regress/hive/TEST005_a.hive.sql | 9 +++++
core/sql/regress/hive/tbl_gbk.data | Bin 0 -> 129 bytes
core/sql/sqlcomp/DefaultConstants.h | 3 ++
core/sql/sqlcomp/nadefaults.cpp | 2 ++
16 files changed, 188 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/9cc1e835/core/sql/generator/GenRelScan.cpp
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/9cc1e835/core/sql/sqlcomp/DefaultConstants.h
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/9cc1e835/core/sql/sqlcomp/nadefaults.cpp
----------------------------------------------------------------------
[8/9] incubator-trafodion git commit: change more for jira 1720
comments
Posted by hz...@apache.org.
change more for jira 1720 comments
Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/e6e88d06
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/e6e88d06
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/e6e88d06
Branch: refs/heads/master
Commit: e6e88d0628979982f02d54c094156cf34f19c7a1
Parents: 63ef4bd
Author: Liu Ming <mi...@esgyn.cn>
Authored: Fri Jan 29 14:36:11 2016 +0000
Committer: Liu Ming <mi...@esgyn.cn>
Committed: Fri Jan 29 14:36:11 2016 +0000
----------------------------------------------------------------------
core/sql/common/csconvert.cpp | 4 +++-
core/sql/exp/exp_conv.cpp | 2 +-
core/sql/generator/GenRelScan.cpp | 6 +-----
3 files changed, 5 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/e6e88d06/core/sql/common/csconvert.cpp
----------------------------------------------------------------------
diff --git a/core/sql/common/csconvert.cpp b/core/sql/common/csconvert.cpp
index fc4263d..b4bdaac 100644
--- a/core/sql/common/csconvert.cpp
+++ b/core/sql/common/csconvert.cpp
@@ -1315,8 +1315,10 @@ int gbkToUtf8(char* gbkString, size_t gbklen,
if ( addNullAtEnd )
{
- if(originalOutlen >= finalLength )
+ if(originalOutlen > finalLength )
result[finalLength] = 0;
+ else
+ return -1;
}
return finalLength;
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/e6e88d06/core/sql/exp/exp_conv.cpp
----------------------------------------------------------------------
diff --git a/core/sql/exp/exp_conv.cpp b/core/sql/exp/exp_conv.cpp
index 36f9a30..3a488fe 100644
--- a/core/sql/exp/exp_conv.cpp
+++ b/core/sql/exp/exp_conv.cpp
@@ -9327,7 +9327,7 @@ convDoIt(char * source,
{
int copyLen = 0;
int convLen = gbkToUtf8( source, sourceLen, target, targetLen);
- if (convLen > 0) {
+ if (convLen >= 0) {
copyLen = convLen;
if ( varCharLen )
setVCLength(varCharLen, varCharLenSize, copyLen);
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/e6e88d06/core/sql/generator/GenRelScan.cpp
----------------------------------------------------------------------
diff --git a/core/sql/generator/GenRelScan.cpp b/core/sql/generator/GenRelScan.cpp
index 7729b13..eaf46f6 100644
--- a/core/sql/generator/GenRelScan.cpp
+++ b/core/sql/generator/GenRelScan.cpp
@@ -216,7 +216,7 @@ int HbaseAccess::createAsciiColAndCastExpr(Generator * generator,
}
if (newGivenType->getTypeQualifier() == NA_CHARACTER_TYPE &&
- CmpCommon::getDefaultString(HIVE_FILE_CHARSET) == "GBK")
+ (CmpCommon::getDefaultString(HIVE_FILE_CHARSET) == "GBK" || CmpCommon::getDefaultString(HIVE_FILE_CHARSET) == "gbk") && CmpCommon::getDefaultString(HIVE_DEFAULT_CHARSET) == "UTF8" )
needTranslate = TRUE;
// source ascii row is a varchar where the data is a pointer to the source data
@@ -798,7 +798,6 @@ short FileScan::codeGenForHive(Generator * generator)
const Int32 executorPredTuppIndex = 3;
const Int32 asciiTuppIndex = 4;
ULng32 asciiRowLen;
- ULng32 translateRowLen;
ExpTupleDesc * asciiTupleDesc = 0;
ex_cri_desc * work_cri_desc = NULL;
@@ -808,7 +807,6 @@ short FileScan::codeGenForHive(Generator * generator)
ExpTupleDesc::TupleDataFormat asciiRowFormat = ExpTupleDesc::SQLARK_EXPLODED_FORMAT;
ExpTupleDesc::TupleDataFormat hdfsRowFormat = ExpTupleDesc::SQLMX_ALIGNED_FORMAT;
ValueIdList asciiVids;
- ValueIdList transVids;
ValueIdList executorPredCastVids;
ValueIdList projectExprOnlyCastVids;
@@ -1515,7 +1513,6 @@ short HbaseAccess::genRowIdExpr(Generator * generator,
int res;
ItemExpr * castVal = NULL;
ItemExpr * asciiVal = NULL;
- ItemExpr * transVal = NULL;
res = createAsciiColAndCastExpr(generator,
givenType,
asciiVal, castVal);
@@ -1611,7 +1608,6 @@ short HbaseAccess::genRowIdExprForNonSQ(Generator * generator,
int res;
ItemExpr * castVal = NULL;
ItemExpr * asciiVal = NULL;
- ItemExpr * transVal = NULL;
res = createAsciiColAndCastExpr(generator,
givenType,
asciiVal, castVal);
[7/9] incubator-trafodion git commit: fix some comments
Posted by hz...@apache.org.
fix some comments
Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/63ef4bdf
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/63ef4bdf
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/63ef4bdf
Branch: refs/heads/master
Commit: 63ef4bdfc880a5e295747b6cdd06b556807a642c
Parents: 6abfa2a
Author: Liu Ming <mi...@esgyn.cn>
Authored: Thu Jan 28 09:49:03 2016 +0000
Committer: Liu Ming <mi...@esgyn.cn>
Committed: Thu Jan 28 09:49:03 2016 +0000
----------------------------------------------------------------------
core/sql/optimizer/SynthType.cpp | 15 +--------------
1 file changed, 1 insertion(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/63ef4bdf/core/sql/optimizer/SynthType.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/SynthType.cpp b/core/sql/optimizer/SynthType.cpp
index 5736422..fc0f12d 100644
--- a/core/sql/optimizer/SynthType.cpp
+++ b/core/sql/optimizer/SynthType.cpp
@@ -5211,20 +5211,7 @@ const NAType *Translate::synthesizeType()
if (translateSource->getCharSet() == CharInfo::GBK )
charsetTarget = CharInfo::UTF8;
else
- {
- /* this is a solution to support GBK before Trafodion can support GBK in total
- * see jira 1720 for more details
- * the logic here is:
- * when HIVE_FILE_CHARSET is not empty, it means the real charset in Hive table is not same as HIVE_DEFAULT_CHARSET
- * in this case, allow the converting , ignoring the source charset checking above
- if( CmpCommon::getDefaultString(HIVE_FILE_CHARSET) == "" )
- err4106arg = SQLCHARSETCODE_GBK;
- else
- charsetTarget = CharInfo::UTF8;
- */
- err4106arg = SQLCHARSETSTRING_GBK;
-
- }
+ err4106arg = SQLCHARSETSTRING_GBK;
break;
case ISO88591_TO_UTF8:
[6/9] incubator-trafodion git commit: enhance of fixes to jira 1720
Posted by hz...@apache.org.
enhance of fixes to jira 1720
Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/6abfa2a6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/6abfa2a6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/6abfa2a6
Branch: refs/heads/master
Commit: 6abfa2a6522376db7d3d5abbc15efdc01334b97d
Parents: aecc2db
Author: Liu Ming <mi...@esgyn.cn>
Authored: Thu Jan 28 09:41:16 2016 +0000
Committer: Liu Ming <mi...@esgyn.cn>
Committed: Thu Jan 28 09:41:16 2016 +0000
----------------------------------------------------------------------
core/sql/common/csconvert.cpp | 2 +-
core/sql/exp/exp_conv.cpp | 8 ++++--
core/sql/generator/GenRelScan.cpp | 26 +++++++++++++++----
core/sql/optimizer/BindItemExpr.cpp | 36 +++++++++++++++++++++-----
core/sql/optimizer/SynthType.cpp | 8 +++---
core/sql/regress/hive/EXPECTED005 | 26 +++++++++++++++++++
core/sql/regress/hive/TEST005 | 7 +++++
core/sql/regress/hive/TEST005_a.hive.sql | 9 +++++++
core/sql/regress/hive/tbl_gbk.data | Bin 0 -> 129 bytes
9 files changed, 105 insertions(+), 17 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/6abfa2a6/core/sql/common/csconvert.cpp
----------------------------------------------------------------------
diff --git a/core/sql/common/csconvert.cpp b/core/sql/common/csconvert.cpp
index 2423976..fc4263d 100644
--- a/core/sql/common/csconvert.cpp
+++ b/core/sql/common/csconvert.cpp
@@ -1311,7 +1311,7 @@ int gbkToUtf8(char* gbkString, size_t gbklen,
int finalLength = charsetConvert( "gbk","utf-8", gbkString, gbklen, result, outlen);
if (finalLength == -1 )
- return 0;
+ return -1;
if ( addNullAtEnd )
{
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/6abfa2a6/core/sql/exp/exp_conv.cpp
----------------------------------------------------------------------
diff --git a/core/sql/exp/exp_conv.cpp b/core/sql/exp/exp_conv.cpp
index b2a11cd..36f9a30 100644
--- a/core/sql/exp/exp_conv.cpp
+++ b/core/sql/exp/exp_conv.cpp
@@ -9329,16 +9329,20 @@ convDoIt(char * source,
int convLen = gbkToUtf8( source, sourceLen, target, targetLen);
if (convLen > 0) {
copyLen = convLen;
+ if ( varCharLen )
+ setVCLength(varCharLen, varCharLenSize, copyLen);
//if the target length is not enough, instead of truncate, raise a SQL Error
if (convLen > targetLen)
ExRaiseSqlError(heap, diagsArea, EXE_STRING_OVERFLOW);
- if ( varCharLen )
- setVCLength(varCharLen, varCharLenSize, copyLen);
}
else {
// LCOV_EXCL_START
convLen = 0;
copyLen = 0;
+ if ( varCharLen )
+ setVCLength(varCharLen, varCharLenSize, copyLen);
+ ExRaiseSqlError(heap, diagsArea, EXE_CONVERT_STRING_ERROR);
+ return ex_expr::EXPR_ERROR;
// LCOV_EXCL_STOP
}
};
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/6abfa2a6/core/sql/generator/GenRelScan.cpp
----------------------------------------------------------------------
diff --git a/core/sql/generator/GenRelScan.cpp b/core/sql/generator/GenRelScan.cpp
index 573873f..7729b13 100644
--- a/core/sql/generator/GenRelScan.cpp
+++ b/core/sql/generator/GenRelScan.cpp
@@ -202,6 +202,7 @@ int HbaseAccess::createAsciiColAndCastExpr(Generator * generator,
asciiValue = NULL;
castValue = NULL;
CollHeap * h = generator->wHeap();
+ bool needTranslate = FALSE;
// if this is an upshifted datatype, remove the upshift attr.
// We dont want to upshift data during retrievals or while building keys.
@@ -214,22 +215,32 @@ int HbaseAccess::createAsciiColAndCastExpr(Generator * generator,
((CharType*)newGivenType)->setUpshifted(FALSE);
}
+ if (newGivenType->getTypeQualifier() == NA_CHARACTER_TYPE &&
+ CmpCommon::getDefaultString(HIVE_FILE_CHARSET) == "GBK")
+ needTranslate = TRUE;
+
// source ascii row is a varchar where the data is a pointer to the source data
// in the hdfs buffer.
NAType *asciiType = NULL;
if (DFS2REC::isDoubleCharacter(newGivenType->getFSDatatype()))
- asciiType = new (h) SQLVarChar(sizeof(Int64)/2, newGivenType->supportsSQLnull(),
+ {
+ asciiType = new (h) SQLVarChar(sizeof(Int64)/2, newGivenType->supportsSQLnull(),
FALSE, FALSE, newGivenType->getCharSet());
+ }
+ // set the source charset to GBK if HIVE_FILE_CHARSET is set
+ // HIVE_FILE_CHARSET can only be empty or GBK
+ else if ( needTranslate == TRUE )
+ {
+ asciiType = new (h) SQLVarChar(sizeof(Int64)/2, newGivenType->supportsSQLnull(),
+ FALSE, FALSE, CharInfo::GBK);
+ }
else
asciiType = new (h) SQLVarChar(sizeof(Int64), newGivenType->supportsSQLnull());
-
if (asciiType)
{
asciiValue = new (h) NATypeToItem(asciiType->newCopy(h));
-
- castValue = new(h) Cast(asciiValue, newGivenType);
-
+ castValue = new(h) Cast(asciiValue, newGivenType);
if (castValue)
{
((Cast*)castValue)->setSrcIsVarcharPtr(TRUE);
@@ -787,6 +798,7 @@ short FileScan::codeGenForHive(Generator * generator)
const Int32 executorPredTuppIndex = 3;
const Int32 asciiTuppIndex = 4;
ULng32 asciiRowLen;
+ ULng32 translateRowLen;
ExpTupleDesc * asciiTupleDesc = 0;
ex_cri_desc * work_cri_desc = NULL;
@@ -796,6 +808,7 @@ short FileScan::codeGenForHive(Generator * generator)
ExpTupleDesc::TupleDataFormat asciiRowFormat = ExpTupleDesc::SQLARK_EXPLODED_FORMAT;
ExpTupleDesc::TupleDataFormat hdfsRowFormat = ExpTupleDesc::SQLMX_ALIGNED_FORMAT;
ValueIdList asciiVids;
+ ValueIdList transVids;
ValueIdList executorPredCastVids;
ValueIdList projectExprOnlyCastVids;
@@ -840,6 +853,7 @@ short FileScan::codeGenForHive(Generator * generator)
asciiVids.insert(asciiValue->getValueId());
castValue->bindNode(generator->getBindWA());
+
if (convertSkipList[ii] == 1 || convertSkipList[ii] == 2)
executorPredCastVids.insert(castValue->getValueId());
else
@@ -1501,6 +1515,7 @@ short HbaseAccess::genRowIdExpr(Generator * generator,
int res;
ItemExpr * castVal = NULL;
ItemExpr * asciiVal = NULL;
+ ItemExpr * transVal = NULL;
res = createAsciiColAndCastExpr(generator,
givenType,
asciiVal, castVal);
@@ -1596,6 +1611,7 @@ short HbaseAccess::genRowIdExprForNonSQ(Generator * generator,
int res;
ItemExpr * castVal = NULL;
ItemExpr * asciiVal = NULL;
+ ItemExpr * transVal = NULL;
res = createAsciiColAndCastExpr(generator,
givenType,
asciiVal, castVal);
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/6abfa2a6/core/sql/optimizer/BindItemExpr.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/BindItemExpr.cpp b/core/sql/optimizer/BindItemExpr.cpp
index da39397..3fd854e 100644
--- a/core/sql/optimizer/BindItemExpr.cpp
+++ b/core/sql/optimizer/BindItemExpr.cpp
@@ -1497,10 +1497,10 @@ ItemExpr* Assign::tryToRelaxCharTypeMatchRules(BindWA *bindWA)
ItemExpr* ItemExpr::tryToDoImplicitCasting(BindWA *bindWA)
{
ItemExpr *result = this;
- enum {iUCS2 = 0, iISO = 1, iUTF8 = 2, iSJIS = 3, iUNK = 4};
- Int32 Literals_involved[5] = { 0, 0, 0, 0, 0 };
- Int32 nonLiterals_involved[5] = { 0, 0, 0, 0, 0 };
- Int32 charsets_involved[5] = { 0, 0, 0, 0, 0 };
+ enum {iUCS2 = 0, iISO = 1, iUTF8 = 2, iSJIS = 3, iGBK = 4, iUNK = 5};
+ Int32 Literals_involved[6] = { 0, 0, 0, 0, 0, 0};
+ Int32 nonLiterals_involved[6] = { 0, 0, 0, 0, 0, 0 };
+ Int32 charsets_involved[6] = { 0, 0, 0, 0, 0, 0 };
Int32 charsetsCount = 0;
CharInfo::CharSet cs = CharInfo::UnknownCharSet;
CharInfo::CharSet curr_chld_cs= CharInfo::UnknownCharSet;
@@ -1545,6 +1545,10 @@ ItemExpr* ItemExpr::tryToDoImplicitCasting(BindWA *bindWA)
cur_chld_cs_ndx = iSJIS;
break;
+ case CharInfo::GBK:
+ cur_chld_cs_ndx = iGBK;
+ break;
+
//case CharInfo::KANJI_MP:
//case CharInfo::KSC5601_MP:
default:
@@ -1593,6 +1597,8 @@ ItemExpr* ItemExpr::tryToDoImplicitCasting(BindWA *bindWA)
cs = CharInfo::UTF8;
else if ( Literals_involved[iSJIS] > 0 )
cs = CharInfo::SJIS;
+ else if ( Literals_involved[iGBK] > 0 )
+ cs = CharInfo::GBK;
//
// Now, we may be able to optimize by translating the 1st child
@@ -1601,7 +1607,7 @@ ItemExpr* ItemExpr::tryToDoImplicitCasting(BindWA *bindWA)
//
if ( ( cs == chld0_cs ) && ( arity == 2 ) &&
( curr_chld_opType != ITM_TRANSLATE ) &&
- ( charsetsCount == (charsets_involved[iUCS2] + charsets_involved[iUTF8]) ) )
+ ( charsetsCount == (charsets_involved[iUCS2] + charsets_involved[iUTF8] + charsets_involved[iGBK]) ) )
{
if ( chld0_opType == ITM_TRANSLATE )
cs = curr_chld_cs; //...because we will eliminate a translate op
@@ -1631,7 +1637,22 @@ ItemExpr* ItemExpr::tryToDoImplicitCasting(BindWA *bindWA)
if ( desiredType->getTypeQualifier() == NA_CHARACTER_TYPE )
{
CharInfo::CharSet Desired_cs = ((const CharType*)desiredType)->getCharSet();
- if ( (chld_cs != Desired_cs) && ( ! ((Cast *)this)->tgtCharSetSpecified() ) )
+ /*
+ * this is a special handling for jira 1720, only used in a bulkload scenario
+ * that is, when user set the HIVE_FILE_CHARSET to 'gbk', it means the data saved in hive
+ * table is encoded as GBK. Trafodion default all Hive data charset as 'UTF8', so
+ * this will allow the auto charset converting to happen during bulk load
+ * the reason is:
+ * hive scan will mark the source column as GBK when HIVE_FILE_CHARSET is set to GBK
+ * which is the only value it can be
+ * So the bind will invoke this implicit casting method to check if an auto charset
+ * converting is needed.
+ * In the hive scan, it does not set the tgtCharSetSpecified field, so in order to
+ * force it to perform a translate, add a checking here
+ */
+ if( (chld_cs != Desired_cs) && CmpCommon::getDefaultString(HIVE_FILE_CHARSET) == "GBK" )
+ result = performImplicitCasting( Desired_cs, bindWA );
+ else if ( (chld_cs != Desired_cs) && ( ! ((Cast *)this)->tgtCharSetSpecified() ) )
{
//
// Looks like user said CAST( ... as [var]char(NNN) )
@@ -1696,6 +1717,9 @@ ItemExpr* ItemExpr::tryToDoImplicitCasting(BindWA *bindWA)
case Translate::UCS2_TO_UTF8:
Required_cs = CharInfo::UNICODE;
break;
+ case Translate::GBK_TO_UTF8:
+ Required_cs = CharInfo::GBK;
+ break;
default:
break;
}
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/6abfa2a6/core/sql/optimizer/SynthType.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/SynthType.cpp b/core/sql/optimizer/SynthType.cpp
index 7d1c8a9..5736422 100644
--- a/core/sql/optimizer/SynthType.cpp
+++ b/core/sql/optimizer/SynthType.cpp
@@ -5217,11 +5217,13 @@ const NAType *Translate::synthesizeType()
* the logic here is:
* when HIVE_FILE_CHARSET is not empty, it means the real charset in Hive table is not same as HIVE_DEFAULT_CHARSET
* in this case, allow the converting , ignoring the source charset checking above
- */
- if( CmpCommon::getDefaultString(HIVE_FILE_CHARSET) == "" ) //CmpCommon::getDefaultString(HIVE_DEFAULT_CHARSET) )
- err4106arg = SQLCHARSETCODE_GB2312;
+ if( CmpCommon::getDefaultString(HIVE_FILE_CHARSET) == "" )
+ err4106arg = SQLCHARSETCODE_GBK;
else
charsetTarget = CharInfo::UTF8;
+ */
+ err4106arg = SQLCHARSETSTRING_GBK;
+
}
break;
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/6abfa2a6/core/sql/regress/hive/EXPECTED005
----------------------------------------------------------------------
diff --git a/core/sql/regress/hive/EXPECTED005 b/core/sql/regress/hive/EXPECTED005
index 49d8a62..9299d25 100644
--- a/core/sql/regress/hive/EXPECTED005
+++ b/core/sql/regress/hive/EXPECTED005
@@ -447,4 +447,30 @@ TINT SM I BIG STR F
--- 1 row(s) selected.
>>
+>>cqd HIVE_FILE_CHARSET 'GBK';
+
+--- SQL operation complete.
+>>select c1, CONVERTTOHEX(c2) from tbl_gbk;
+
+C1 (EXPR)
+----------- --------------------------------------------------
+
+ 3 EC8B90EC978E
+ 5 EC8B90EC978E
+ 2 EC8B90EC978E
+ 4 EC8B90EC978E
+ 6 EC8B90EC978E
+ 7 EC8B90EC978E
+ 8 EC8B90EC978E
+ 3 ECBB93EB9F8FECAB97EB9B91
+ 2 ECBB93EB9F8FECAB97EB9B91
+ 6 ECBB93EB9F8FECAB97EB9B91
+ 19 ECBB93EB9F8FECAB97EB9B91
+ 8 ECBB93EB9F8FECAB97EB9B91
+
+--- 12 row(s) selected.
+>>cqd HIVE_FILE_CHARSET reset;
+
+--- SQL operation complete.
+>>
>>log;
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/6abfa2a6/core/sql/regress/hive/TEST005
----------------------------------------------------------------------
diff --git a/core/sql/regress/hive/TEST005 b/core/sql/regress/hive/TEST005
index b8f7518..5bc2a21 100644
--- a/core/sql/regress/hive/TEST005
+++ b/core/sql/regress/hive/TEST005
@@ -30,16 +30,19 @@ sh regrhadoop.ksh fs -mkdir /user/hive/exttables/customer_ddl;
sh regrhadoop.ksh fs -mkdir /user/hive/exttables/customer_temp;
sh regrhadoop.ksh fs -mkdir /user/hive/exttables/tbl_utf8;
sh regrhadoop.ksh fs -mkdir /user/hive/exttables/tbl_type;
+sh regrhadoop.ksh fs -mkdir /user/hive/exttables/tbl_gbk;
--empty folders
sh regrhadoop.ksh fs -rm /user/hive/exttables/customer_ddl/*;
sh regrhadoop.ksh fs -rm /user/hive/exttables/customer_temp/*;
sh regrhadoop.ksh fs -rm /user/hive/exttables/tbl_utf8/*;
sh regrhadoop.ksh fs -rm /user/hive/exttables/tbl_type/*;
+sh regrhadoop.ksh fs -rm /user/hive/exttables/tbl_gbk/*;
--- setup Hive tables
sh regrhive.ksh -v -f $REGRTSTDIR/TEST005_a.hive.sql;
sh regrhadoop.ksh fs -put $REGRTSTDIR/tbl_utf8.data /user/hive/exttables/tbl_utf8;
sh regrhadoop.ksh fs -put $REGRTSTDIR/tbl_type.data /user/hive/exttables/tbl_type;
+sh regrhadoop.ksh fs -put $REGRTSTDIR/tbl_gbk.data /user/hive/exttables/tbl_gbk;
log LOG005 clear;
@@ -219,4 +222,8 @@ select * from tbl_type;
insert into tbl_type_temp select * from tbl_type;
select * from tbl_type_temp;
+cqd HIVE_FILE_CHARSET 'GBK';
+select c1, CONVERTTOHEX(c2) from tbl_gbk;
+cqd HIVE_FILE_CHARSET reset;
+
log;
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/6abfa2a6/core/sql/regress/hive/TEST005_a.hive.sql
----------------------------------------------------------------------
diff --git a/core/sql/regress/hive/TEST005_a.hive.sql b/core/sql/regress/hive/TEST005_a.hive.sql
index ab4098f..1b5c580 100644
--- a/core/sql/regress/hive/TEST005_a.hive.sql
+++ b/core/sql/regress/hive/TEST005_a.hive.sql
@@ -154,3 +154,12 @@ create table tbl_type_temp
t timestamp
)
row format delimited fields terminated by '|';
+
+drop table tbl_gbk;
+create external table tbl_gbk
+(
+ c1 int,
+ c2 string
+)
+row format delimited fields terminated by '\t'
+location '/user/hive/exttables/tbl_gbk';
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/6abfa2a6/core/sql/regress/hive/tbl_gbk.data
----------------------------------------------------------------------
diff --git a/core/sql/regress/hive/tbl_gbk.data b/core/sql/regress/hive/tbl_gbk.data
new file mode 100644
index 0000000..2fa331b
Binary files /dev/null and b/core/sql/regress/hive/tbl_gbk.data differ