You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafodion.apache.org by hz...@apache.org on 2017/03/02 23:41:56 UTC

[1/2] incubator-trafodion git commit: [TRAFODION-2477] Invalid characters in translation are ignored

Repository: incubator-trafodion
Updated Branches:
  refs/heads/master 8c5ade722 -> b19286ba7


[TRAFODION-2477] Invalid characters in translation are ignored

Right now we ignore such invalid characters and also may truncate
the string at the point of the invalid char. The expected behavior
would be an error.

The only type of invalid data I could create with regular SQL syntax
is an invalid UTF-16 surrogate pair. We have no checks that detect
those when we enter the data. Invalid UTF-8, on the other hand, is
rejected when we try to insert it in the database (at least in the
case I tried).

The fix adds a check to generate an error (file conversionLocale.cpp).
It also adds two CQDs to suppress the error (remaining code files)
and to replace the invalid character with a replacement character.
Right now we use "?", even for Unicode, which has a special replacement
character, see TRAFODION-2515.


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/079b2107
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/079b2107
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/079b2107

Branch: refs/heads/master
Commit: 079b2107bc6c4475192d21deba9cac3f1f6687dd
Parents: 3d54fdd
Author: Hans Zeller <hz...@apache.org>
Authored: Wed Mar 1 22:33:44 2017 +0000
Committer: Hans Zeller <hz...@apache.org>
Committed: Wed Mar 1 22:33:44 2017 +0000

----------------------------------------------------------------------
 core/sql/common/conversionLocale.cpp  |  4 +-
 core/sql/exp/exp_function.cpp         | 10 ++++-
 core/sql/exp/exp_function.h           | 20 +++++++--
 core/sql/generator/GenItemFunc.cpp    | 14 +++++-
 core/sql/regress/charsets/EXPECTED314 | 68 ++++++++++++++++++++++++++++++
 core/sql/regress/charsets/TEST314     | 12 ++++++
 core/sql/sqlcomp/DefaultConstants.h   |  3 ++
 core/sql/sqlcomp/nadefaults.cpp       |  2 +
 8 files changed, 125 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/079b2107/core/sql/common/conversionLocale.cpp
----------------------------------------------------------------------
diff --git a/core/sql/common/conversionLocale.cpp b/core/sql/common/conversionLocale.cpp
index 420b9c7..a225308 100644
--- a/core/sql/common/conversionLocale.cpp
+++ b/core/sql/common/conversionLocale.cpp
@@ -88,8 +88,8 @@ charBuf* unicodeToUtf8(const NAWcharBuf& unicodeString, CollHeap *heap,
                       , addNullAtEnd
                       , allowInvalidCodePoint 
                       );
-   if ( res == NULL ) // translation failed
-     return res;
+   if ( res == NULL || errorcode != 0) // translation failed
+     return NULL;
 
    charBuf* output = checkSpace(heap, res->getStrLen(), utf8String, addNullAtEnd);
 

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/079b2107/core/sql/exp/exp_function.cpp
----------------------------------------------------------------------
diff --git a/core/sql/exp/exp_function.cpp b/core/sql/exp/exp_function.cpp
index 7a19492..437be28 100644
--- a/core/sql/exp/exp_function.cpp
+++ b/core/sql/exp/exp_function.cpp
@@ -393,10 +393,12 @@ ex_function_substring::ex_function_substring(OperatorTypeEnum oper_type,
 ex_function_translate::ex_function_translate(OperatorTypeEnum oper_type,
                                    Attributes ** attr,
                                    Space * space,
-                                   Int32 conv_type)
+                                   Int32 conv_type,
+                                   Int16 flags)
 : ex_function_clause(oper_type, 2 , attr, space)
 {
   conv_type_= conv_type;
+  flags_ = flags;
 };
 
 ex_function_trim::ex_function_trim(OperatorTypeEnum oper_type,
@@ -5812,6 +5814,8 @@ ex_expr::exp_return_type ex_function_translate::eval(char *op_data[],
 
   Attributes * op0 = getOperand(0);
   Attributes * op1 = getOperand(1);
+  ULng32 convFlags = (flags_ & TRANSLATE_FLAG_ALLOW_INVALID_CODEPOINT ?
+                      CONV_ALLOW_INVALID_CODE_VALUE : 0);
 
 #pragma nowarn(1506)   // warning elimination 
       return convDoIt(op_data[1],
@@ -5828,7 +5832,9 @@ ex_expr::exp_return_type ex_function_translate::eval(char *op_data[],
         op0->getVCIndicatorLength(),
         heap,
         diagsArea,
-        (ConvInstruction)convType);
+        (ConvInstruction)convType,
+        NULL,
+        convFlags);
 #pragma warn(1506)  // warning elimination 
 }
   

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/079b2107/core/sql/exp/exp_function.h
----------------------------------------------------------------------
diff --git a/core/sql/exp/exp_function.h b/core/sql/exp/exp_function.h
index 17129cf..ad5ff01 100644
--- a/core/sql/exp/exp_function.h
+++ b/core/sql/exp/exp_function.h
@@ -1345,8 +1345,10 @@ class SQLEXP_LIB_FUNC  ex_function_translate : public ex_function_clause {
 
 public:
   NA_EIDPROC ex_function_translate (OperatorTypeEnum oper_type,
-                               Attributes ** attr,
-                               Space * space, Int32 conv_type);
+                                    Attributes ** attr,
+                                    Space * space,
+                                    Int32 conv_type,
+                                    Int16 flags);
   NA_EIDPROC ex_function_translate () {};
 
 
@@ -1371,16 +1373,28 @@ public:
   }
 
   NA_EIDPROC virtual short getClassSize() { return (short)sizeof(*this); }
+
+  // flags:
+  // 0x0001 set the CONV_ALLOW_INVALID_CODE_VALUE flag when converting
+  //        the data to allow invalid code points and replace them
+  //        with a replacement character
+  enum TranslateFlags
+  {
+    TRANSLATE_FLAG_ALLOW_INVALID_CODEPOINT = 0x001
+  };
+
   // ---------------------------------------------------------------------
 
 private:
   Int32            conv_type_;           // 00-03
+  // flags, see TranslateFlags enum above
+  Int16            flags_;               // 04-05
   // ---------------------------------------------------------------------
   // Fillers for potential future extensions without changing class size.
   // When a new member is added, size of this filler should be reduced so
   // that the size of the object remains the same (and is modulo 8).
   // ---------------------------------------------------------------------
-  char             fillers_[4];          // 04-07
+  char             fillers_[2];          // 06-07
 
 };
 

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/079b2107/core/sql/generator/GenItemFunc.cpp
----------------------------------------------------------------------
diff --git a/core/sql/generator/GenItemFunc.cpp b/core/sql/generator/GenItemFunc.cpp
index 41875ae..6e98065 100644
--- a/core/sql/generator/GenItemFunc.cpp
+++ b/core/sql/generator/GenItemFunc.cpp
@@ -1873,6 +1873,8 @@ short NoOp::codeGen(Generator * generator)
 short Translate::codeGen(Generator * generator)
 {
   Attributes ** attr;
+  NABoolean unicodeToUnicode = FALSE;
+  Int16 translateFlags = 0;
   
   if (generator->getExpGenerator()->genItemExpr(this, &attr, (1 + getArity()), -1) == 1)
     return 0;
@@ -1893,12 +1895,14 @@ short Translate::codeGen(Generator * generator)
 	break;
      case UTF8_TO_UCS2:
 	convType = CONV_UTF8_F_UCS2_V;
+        unicodeToUnicode = TRUE;
 	break;
      case UCS2_TO_SJIS:
 	convType = CONV_UCS2_F_SJIS_V;
 	break;
      case UCS2_TO_UTF8:
 	convType = CONV_UCS2_F_UTF8_V;
+        unicodeToUnicode = TRUE;
 	break;
      case GBK_TO_UTF8:
         convType = CONV_GBK_F_UTF8_V;
@@ -1911,11 +1915,19 @@ short Translate::codeGen(Generator * generator)
 	convType = CONV_ASCII_F_V;
 	break;
   }
+
+  if (CmpCommon::getDefault(TRANSLATE_ERROR) == DF_OFF ||
+      (unicodeToUnicode &&
+       CmpCommon::getDefault(TRANSLATE_ERROR_UNICODE_TO_UNICODE) == DF_OFF))
+    translateFlags |= ex_function_translate::TRANSLATE_FLAG_ALLOW_INVALID_CODEPOINT;
+
   ex_clause * function_clause = 
 	new(generator->getSpace()) ex_function_translate(
 			         getOperatorType(),
 				 attr, 
-				 generator->getSpace(), convType
+				 generator->getSpace(),
+                                 convType,
+                                 translateFlags
 				);
 
   generator->getExpGenerator()->linkClause(this, function_clause);

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/079b2107/core/sql/regress/charsets/EXPECTED314
----------------------------------------------------------------------
diff --git a/core/sql/regress/charsets/EXPECTED314 b/core/sql/regress/charsets/EXPECTED314
index 9aa60fd..63a0ccf 100644
--- a/core/sql/regress/charsets/EXPECTED314
+++ b/core/sql/regress/charsets/EXPECTED314
@@ -465,6 +465,74 @@ C5BF
 5349                                
 
 --- 1 row(s) selected.
+>>-- Invalid UTF-16 surrogate pairs
+>>select converttohex(TRANSLATE(_ucs2 X'D8340041' using UCS2toUTF8)) from (values(0))x;
+
+*** ERROR[8690] An invalid character value encountered in TRANSLATE function. Source charset UNICODE , Target charset UTF8. Error data in HEX: 34ffffffd84100 .
+
+--- 0 row(s) selected.
+>>select converttohex(TRANSLATE(_ucs2 X'DD1E0041' using UCS2toUTF8)) from (values(0))x;
+
+*** ERROR[8690] An invalid character value encountered in TRANSLATE function. Source charset UNICODE , Target charset UTF8. Error data in HEX: 1effffffdd4100 .
+
+--- 0 row(s) selected.
+>>cqd TRANSLATE_ERROR_UNICODE_TO_UNICODE 'off';
+
+--- SQL operation complete.
+>>select converttohex(TRANSLATE(_ucs2 X'D8340041' using UCS2toUTF8)) from (values(0))x;
+
+(EXPR)      
+------------
+
+3F41        
+
+--- 1 row(s) selected.
+>>select converttohex(TRANSLATE(_ucs2 X'DD1E0041' using UCS2toUTF8)) from (values(0))x;
+
+(EXPR)      
+------------
+
+3F41        
+
+--- 1 row(s) selected.
+>>select converttohex(cast(cast(_ucs2 X'DD1E0041' as char(2) character set ucs2) as char(2) character set UTF8)) from (values(0))x;
+
+(EXPR)          
+----------------
+
+3F41            
+
+--- 1 row(s) selected.
+>>cqd TRANSLATE_ERROR_UNICODE_TO_UNICODE reset;
+
+--- SQL operation complete.
+>>cqd TRANSLATE_ERROR 'off';
+
+--- SQL operation complete.
+>>select converttohex(TRANSLATE(_ucs2 X'D8340041' using UCS2toUTF8)) from (values(0))x;
+
+(EXPR)      
+------------
+
+3F41        
+
+--- 1 row(s) selected.
+>>select converttohex(TRANSLATE(_ucs2 X'DD1E0041' using UCS2toUTF8)) from (values(0))x;
+
+(EXPR)      
+------------
+
+3F41        
+
+--- 1 row(s) selected.
+>>select converttohex(cast(cast(_ucs2 X'DD1E0041' as char(2) character set ucs2) as char(2) character set UTF8)) from (values(0))x;
+
+(EXPR)          
+----------------
+
+3F41            
+
+--- 1 row(s) selected.
 >>--
 >>select converttohex(LEFT(      x'C2 80 C2 81 C2 82 DF BE DF BF',4)) from (values(0))x;
 

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/079b2107/core/sql/regress/charsets/TEST314
----------------------------------------------------------------------
diff --git a/core/sql/regress/charsets/TEST314 b/core/sql/regress/charsets/TEST314
index 560f18e..2e487d1 100644
--- a/core/sql/regress/charsets/TEST314
+++ b/core/sql/regress/charsets/TEST314
@@ -113,6 +113,18 @@ select converttohex(UPPER(TRANSLATE(_ucs2 x'0131017F' using UCS2toUTF8))) from (
 select converttohex(TRANSLATE(UPPER(_ucs2 x'0131017F') using UCS2toUTF8)) from (values(0))x;
 select converttohex(UPPER(TRANSLATE(_ucs2 x'017F0131' using UCS2toUTF8))) from (values(0))x;
 select converttohex(TRANSLATE(UPPER(_ucs2 x'017F0131') using UCS2toUTF8)) from (values(0))x;
+-- Invalid UTF-16 surrogate pairs
+select converttohex(TRANSLATE(_ucs2 X'D8340041' using UCS2toUTF8)) from (values(0))x;
+select converttohex(TRANSLATE(_ucs2 X'DD1E0041' using UCS2toUTF8)) from (values(0))x;
+cqd TRANSLATE_ERROR_UNICODE_TO_UNICODE 'off';
+select converttohex(TRANSLATE(_ucs2 X'D8340041' using UCS2toUTF8)) from (values(0))x;
+select converttohex(TRANSLATE(_ucs2 X'DD1E0041' using UCS2toUTF8)) from (values(0))x;
+select converttohex(cast(cast(_ucs2 X'DD1E0041' as char(2) character set ucs2) as char(2) character set UTF8)) from (values(0))x;
+cqd TRANSLATE_ERROR_UNICODE_TO_UNICODE reset;
+cqd TRANSLATE_ERROR 'off';
+select converttohex(TRANSLATE(_ucs2 X'D8340041' using UCS2toUTF8)) from (values(0))x;
+select converttohex(TRANSLATE(_ucs2 X'DD1E0041' using UCS2toUTF8)) from (values(0))x;
+select converttohex(cast(cast(_ucs2 X'DD1E0041' as char(2) character set ucs2) as char(2) character set UTF8)) from (values(0))x;
 --
 select converttohex(LEFT(      x'C2 80 C2 81 C2 82 DF BE DF BF',4)) from (values(0))x;
 select converttohex(LEFT(_utf8 x'C2 80 C2 81 C2 82 DF BE DF BF',4)) from (values(0))x;

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/079b2107/core/sql/sqlcomp/DefaultConstants.h
----------------------------------------------------------------------
diff --git a/core/sql/sqlcomp/DefaultConstants.h b/core/sql/sqlcomp/DefaultConstants.h
index 923918a..a930d07 100644
--- a/core/sql/sqlcomp/DefaultConstants.h
+++ b/core/sql/sqlcomp/DefaultConstants.h
@@ -3893,6 +3893,9 @@ enum DefaultConstants
   CSE_COMMON_KEY_PRED_CONTROL,
   CSE_PCT_KEY_COL_PRED_CONTROL,
 
+  TRANSLATE_ERROR,
+  TRANSLATE_ERROR_UNICODE_TO_UNICODE,
+
   // This enum constant must be the LAST one in the list; it's a count,
   // not an Attribute (it's not IN DefaultDefaults; it's the SIZE of it)!
   __NUM_DEFAULT_ATTRIBUTES

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/079b2107/core/sql/sqlcomp/nadefaults.cpp
----------------------------------------------------------------------
diff --git a/core/sql/sqlcomp/nadefaults.cpp b/core/sql/sqlcomp/nadefaults.cpp
index 709a07a..ba9efdf 100644
--- a/core/sql/sqlcomp/nadefaults.cpp
+++ b/core/sql/sqlcomp/nadefaults.cpp
@@ -3430,6 +3430,8 @@ XDDkwd__(SUBQUERY_UNNESTING,			"ON"),
   DDkwd__(TRAF_USE_REGION_XN,                          "OFF"),
 
   DDkwd__(TRAF_USE_RWRS_FOR_MD_INSERT,                   "ON"),
+  DDkwd__(TRANSLATE_ERROR,                             "ON"),
+  DDkwd__(TRANSLATE_ERROR_UNICODE_TO_UNICODE,          "ON"),
 
   DDkwd__(TRY_DP2_REPARTITION_ALWAYS,		"OFF"),
 


[2/2] incubator-trafodion git commit: Merge [TRAFODION-2477] Invalid characters in translation are ignored PR-986

Posted by hz...@apache.org.
Merge [TRAFODION-2477] Invalid characters in translation are ignored PR-986


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/b19286ba
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/b19286ba
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/b19286ba

Branch: refs/heads/master
Commit: b19286ba733c8449b4bd258b08746e675ff8c7b2
Parents: 8c5ade7 079b210
Author: Hans Zeller <hz...@apache.org>
Authored: Thu Mar 2 23:41:16 2017 +0000
Committer: Hans Zeller <hz...@apache.org>
Committed: Thu Mar 2 23:41:16 2017 +0000

----------------------------------------------------------------------
 core/sql/common/conversionLocale.cpp  |  4 +-
 core/sql/exp/exp_function.cpp         | 10 ++++-
 core/sql/exp/exp_function.h           | 20 +++++++--
 core/sql/generator/GenItemFunc.cpp    | 14 +++++-
 core/sql/regress/charsets/EXPECTED314 | 68 ++++++++++++++++++++++++++++++
 core/sql/regress/charsets/TEST314     | 12 ++++++
 core/sql/sqlcomp/DefaultConstants.h   |  3 ++
 core/sql/sqlcomp/nadefaults.cpp       |  2 +
 8 files changed, 125 insertions(+), 8 deletions(-)
----------------------------------------------------------------------