You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafodion.apache.org by se...@apache.org on 2019/02/01 01:50:58 UTC

[trafodion] branch master updated: [TRAFODION-3265] INSERT values to VARCHAR column of UTF8 charset can be extremely slow

This is an automated email from the ASF dual-hosted git repository.

selva pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafodion.git


The following commit(s) were added to refs/heads/master by this push:
     new 922158e  [TRAFODION-3265] INSERT values to VARCHAR column of UTF8 charset can be extremely slow
     new a3a0e7e  Merge pull request #1785 from zhenxingh/traf-3265-insert-varchar-utf8-slow
922158e is described below

commit 922158e694ae38f8b1ef5c75e1013acd295a2921
Author: He Zhenxing <zh...@esgyn.cn>
AuthorDate: Wed Jan 30 15:33:20 2019 +0800

    [TRAFODION-3265] INSERT values to VARCHAR column of UTF8 charset can be extremely slow
---
 core/sql/optimizer/ItemExpr.cpp             | 14 +++----
 core/sql/regress/charsets/EXPECTED3265      | 44 ++++++++++++++++++++
 core/sql/regress/charsets/FILTER3265        | 30 ++++++++++++++
 core/sql/regress/charsets/TEST3265          | 62 +++++++++++++++++++++++++++++
 core/sql/regress/tools/runregr_charsets.ksh |  2 +-
 5 files changed, 142 insertions(+), 10 deletions(-)

diff --git a/core/sql/optimizer/ItemExpr.cpp b/core/sql/optimizer/ItemExpr.cpp
index 0071cc2..988e8d8 100644
--- a/core/sql/optimizer/ItemExpr.cpp
+++ b/core/sql/optimizer/ItemExpr.cpp
@@ -10984,20 +10984,16 @@ const NAString ConstValue::getText() const
 {
   if(getType()->getTypeQualifier() == NA_CHARACTER_TYPE)
     {
-      NAString result(CmpCommon::statementHeap());
-      if (!textIsValidatedSQLLiteralInUTF8_)
-        result += "\'";
-      if (text_) result += *text_;
-      if (!textIsValidatedSQLLiteralInUTF8_)
-        result += "\'";
+      NAString result = getTextForQuery(QUERY_FORMAT);
 
       // Change imbedded NULL and \377 chars to \0 and \377
       // This comes up in key values quite often.
-      size_t index;
-      while((index = result.first('\0')) != NA_NPOS
+      size_t index = 0;
+      while((index = result.first('\0', index)) != NA_NPOS
 	    && index != result.length())
 	result(index,1) = "\\0";
-      while((index = result.first('\377')) != NA_NPOS
+      index = 0;
+      while((index = result.first('\377', index)) != NA_NPOS
 	    && index != result.length())
 	result(index,1) = "\\377";
 
diff --git a/core/sql/regress/charsets/EXPECTED3265 b/core/sql/regress/charsets/EXPECTED3265
new file mode 100644
index 0000000..c4c1473
--- /dev/null
+++ b/core/sql/regress/charsets/EXPECTED3265
@@ -0,0 +1,44 @@
+>>
+>>obey test3265(testutf8);
+>>
+>>create table cs3265t1 (a varchar(65535) character set utf8);
+
+--- SQL operation complete.
+>>
+>>set terminal_charset utf8;
+>>
+>>-- The first insert will load meta data, which may take more than a
+>>-- second to finish. So we do the real test as the second insert to
+>>-- make sure it can be finished within a second.
+>>insert into cs3265t1 values ('a');
+
+--- 1 row(s) inserted.
+>>
+>>-- Turn on statistics to record the execution time for the insert
+>>-- below, the filter script will filter out the fraction part, which
+>>-- means this will only pass if the insert can be finished within a
+>>-- second.
+>>set statistics on;
+>>insert into cs3265t1 values ('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa [...]
+
+--- 1 row(s) inserted.
+
+Start Time             2019/01/30 16:34:01.940529
+End Time               2019/01/30 16:34:02.108767
+Elapsed Time                      00:00:00.168238
+Compile Time                      00:00:00.164444
+Execution Time                    00:00:00.003728
+
+
+--- SQL operation complete.
+>>
+>>set statistics off;
+>>
+>>
+>>obey test3265(clnup);
+>>
+>>drop table if exists cs3265t1;
+
+--- SQL operation complete.
+>>
+>>log;
diff --git a/core/sql/regress/charsets/FILTER3265 b/core/sql/regress/charsets/FILTER3265
new file mode 100755
index 0000000..78af0b8
--- /dev/null
+++ b/core/sql/regress/charsets/FILTER3265
@@ -0,0 +1,30 @@
+#! /bin/sh
+# @@@ START COPYRIGHT @@@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# @@@ END COPYRIGHT @@@
+
+# Like the pattern-masking of QACOMP, here we filter out such stuff as
+# timestamps and generated identifiers.  Called by the runregr script
+# before doing diff.
+
+sed "
+s/[0-9][0-9][0-9][0-9]\/[0-9][0-9]\/[0-9][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9][.][0-9][0-9][0-9][0-9][0-9][0-9]/yyyy\/mm\/dd hh:mm:ss.xxxxxx/g
+s/00:00:00[.][0-9][0-9][0-9][0-9][0-9][0-9]/00:00:00.xxxxxx/g
+" "$1"
diff --git a/core/sql/regress/charsets/TEST3265 b/core/sql/regress/charsets/TEST3265
new file mode 100644
index 0000000..f65e40b
--- /dev/null
+++ b/core/sql/regress/charsets/TEST3265
@@ -0,0 +1,62 @@
+-- -*- mode: sql; coding: utf-8 -*-
+-- Test: TEST3265
+-- Functionality: This is the regression test for TRAFODION-3265
+-- Expected files:   EXPECTED3265
+-- Tables created:   cs3265t*
+-- Limitations: None
+--
+-- @@@ START COPYRIGHT @@@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements.  See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership.  The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License.  You may obtain a copy of the License at
+--
+--   http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied.  See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+--
+-- @@@ END COPYRIGHT @@@
+
+obey test3265(clnup);
+
+log log3265 clear;
+
+obey test3265(testutf8);
+
+obey test3265(clnup);
+
+log;
+exit;
+
+?section testutf8
+
+create table cs3265t1 (a varchar(65535) character set utf8);
+
+set terminal_charset utf8;
+
+-- The first insert will load meta data, which may take more than a
+-- second to finish. So we do the real test as the second insert to
+-- make sure it can be finished within a second.
+insert into cs3265t1 values ('a');
+
+-- Turn on statistics to record the execution time for the insert
+-- below, the filter script will filter out the fraction part, which
+-- means this will only pass if the insert can be finished within a
+-- second.
+set statistics on;
+insert into cs3265t1 values ('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa [...]
+
+set statistics off;
+
+?section clnup
+
+drop table if exists cs3265t1;
diff --git a/core/sql/regress/tools/runregr_charsets.ksh b/core/sql/regress/tools/runregr_charsets.ksh
index d94be75..5ec6af9 100755
--- a/core/sql/regress/tools/runregr_charsets.ksh
+++ b/core/sql/regress/tools/runregr_charsets.ksh
@@ -183,7 +183,7 @@ fi
 
 # sbtestfiles contains the list of tests to be run in seabase mode
 if [ "$seabase" -ne 0 ]; then
-  sbtestfiles="TEST001 TEST002 TEST003 TEST004 TEST008 TEST009 TEST010 TEST012 TEST014 TEST310 TEST311 TEST312 TEST313 TEST314 TEST315" 
+  sbtestfiles="TEST001 TEST002 TEST003 TEST004 TEST008 TEST009 TEST010 TEST012 TEST014 TEST310 TEST311 TEST312 TEST313 TEST314 TEST315 TEST3265" 
   sbprettyfiles=
   for i in $prettyfiles; do
     for j in $sbtestfiles; do