You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafodion.apache.org by se...@apache.org on 2019/02/01 01:50:58 UTC
[trafodion] branch master updated: [TRAFODION-3265] INSERT values
to VARCHAR column of UTF8 charset can be extremely slow
This is an automated email from the ASF dual-hosted git repository.
selva pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafodion.git
The following commit(s) were added to refs/heads/master by this push:
new 922158e [TRAFODION-3265] INSERT values to VARCHAR column of UTF8 charset can be extremely slow
new a3a0e7e Merge pull request #1785 from zhenxingh/traf-3265-insert-varchar-utf8-slow
922158e is described below
commit 922158e694ae38f8b1ef5c75e1013acd295a2921
Author: He Zhenxing <zh...@esgyn.cn>
AuthorDate: Wed Jan 30 15:33:20 2019 +0800
[TRAFODION-3265] INSERT values to VARCHAR column of UTF8 charset can be extremely slow
---
core/sql/optimizer/ItemExpr.cpp | 14 +++----
core/sql/regress/charsets/EXPECTED3265 | 44 ++++++++++++++++++++
core/sql/regress/charsets/FILTER3265 | 30 ++++++++++++++
core/sql/regress/charsets/TEST3265 | 62 +++++++++++++++++++++++++++++
core/sql/regress/tools/runregr_charsets.ksh | 2 +-
5 files changed, 142 insertions(+), 10 deletions(-)
diff --git a/core/sql/optimizer/ItemExpr.cpp b/core/sql/optimizer/ItemExpr.cpp
index 0071cc2..988e8d8 100644
--- a/core/sql/optimizer/ItemExpr.cpp
+++ b/core/sql/optimizer/ItemExpr.cpp
@@ -10984,20 +10984,16 @@ const NAString ConstValue::getText() const
{
if(getType()->getTypeQualifier() == NA_CHARACTER_TYPE)
{
- NAString result(CmpCommon::statementHeap());
- if (!textIsValidatedSQLLiteralInUTF8_)
- result += "\'";
- if (text_) result += *text_;
- if (!textIsValidatedSQLLiteralInUTF8_)
- result += "\'";
+ NAString result = getTextForQuery(QUERY_FORMAT);
// Change imbedded NULL and \377 chars to \0 and \377
// This comes up in key values quite often.
- size_t index;
- while((index = result.first('\0')) != NA_NPOS
+ size_t index = 0;
+ while((index = result.first('\0', index)) != NA_NPOS
&& index != result.length())
result(index,1) = "\\0";
- while((index = result.first('\377')) != NA_NPOS
+ index = 0;
+ while((index = result.first('\377', index)) != NA_NPOS
&& index != result.length())
result(index,1) = "\\377";
diff --git a/core/sql/regress/charsets/EXPECTED3265 b/core/sql/regress/charsets/EXPECTED3265
new file mode 100644
index 0000000..c4c1473
--- /dev/null
+++ b/core/sql/regress/charsets/EXPECTED3265
@@ -0,0 +1,44 @@
+>>
+>>obey test3265(testutf8);
+>>
+>>create table cs3265t1 (a varchar(65535) character set utf8);
+
+--- SQL operation complete.
+>>
+>>set terminal_charset utf8;
+>>
+>>-- The first insert will load meta data, which may take more than a
+>>-- second to finish. So we do the real test as the second insert to
+>>-- make sure it can be finished within a second.
+>>insert into cs3265t1 values ('a');
+
+--- 1 row(s) inserted.
+>>
+>>-- Turn on statistics to record the execution time for the insert
+>>-- below, the filter script will filter out the fraction part, which
+>>-- means this will only pass if the insert can be finished within a
+>>-- second.
+>>set statistics on;
+>>insert into cs3265t1 values ('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa [...]
+
+--- 1 row(s) inserted.
+
+Start Time 2019/01/30 16:34:01.940529
+End Time 2019/01/30 16:34:02.108767
+Elapsed Time 00:00:00.168238
+Compile Time 00:00:00.164444
+Execution Time 00:00:00.003728
+
+
+--- SQL operation complete.
+>>
+>>set statistics off;
+>>
+>>
+>>obey test3265(clnup);
+>>
+>>drop table if exists cs3265t1;
+
+--- SQL operation complete.
+>>
+>>log;
diff --git a/core/sql/regress/charsets/FILTER3265 b/core/sql/regress/charsets/FILTER3265
new file mode 100755
index 0000000..78af0b8
--- /dev/null
+++ b/core/sql/regress/charsets/FILTER3265
@@ -0,0 +1,30 @@
+#! /bin/sh
+# @@@ START COPYRIGHT @@@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# @@@ END COPYRIGHT @@@
+
+# Like the pattern-masking of QACOMP, here we filter out such stuff as
+# timestamps and generated identifiers. Called by the runregr script
+# before doing diff.
+
+sed "
+s/[0-9][0-9][0-9][0-9]\/[0-9][0-9]\/[0-9][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9][.][0-9][0-9][0-9][0-9][0-9][0-9]/yyyy\/mm\/dd hh:mm:ss.xxxxxx/g
+s/00:00:00[.][0-9][0-9][0-9][0-9][0-9][0-9]/00:00:00.xxxxxx/g
+" "$1"
diff --git a/core/sql/regress/charsets/TEST3265 b/core/sql/regress/charsets/TEST3265
new file mode 100644
index 0000000..f65e40b
--- /dev/null
+++ b/core/sql/regress/charsets/TEST3265
@@ -0,0 +1,62 @@
+-- -*- mode: sql; coding: utf-8 -*-
+-- Test: TEST3265
+-- Functionality: This is the regression test for TRAFODION-3265
+-- Expected files: EXPECTED3265
+-- Tables created: cs3265t*
+-- Limitations: None
+--
+-- @@@ START COPYRIGHT @@@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied. See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+--
+-- @@@ END COPYRIGHT @@@
+
+obey test3265(clnup);
+
+log log3265 clear;
+
+obey test3265(testutf8);
+
+obey test3265(clnup);
+
+log;
+exit;
+
+?section testutf8
+
+create table cs3265t1 (a varchar(65535) character set utf8);
+
+set terminal_charset utf8;
+
+-- The first insert will load meta data, which may take more than a
+-- second to finish. So we do the real test as the second insert to
+-- make sure it can be finished within a second.
+insert into cs3265t1 values ('a');
+
+-- Turn on statistics to record the execution time for the insert
+-- below, the filter script will filter out the fraction part, which
+-- means this will only pass if the insert can be finished within a
+-- second.
+set statistics on;
+insert into cs3265t1 values ('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa [...]
+
+set statistics off;
+
+?section clnup
+
+drop table if exists cs3265t1;
diff --git a/core/sql/regress/tools/runregr_charsets.ksh b/core/sql/regress/tools/runregr_charsets.ksh
index d94be75..5ec6af9 100755
--- a/core/sql/regress/tools/runregr_charsets.ksh
+++ b/core/sql/regress/tools/runregr_charsets.ksh
@@ -183,7 +183,7 @@ fi
# sbtestfiles contains the list of tests to be run in seabase mode
if [ "$seabase" -ne 0 ]; then
- sbtestfiles="TEST001 TEST002 TEST003 TEST004 TEST008 TEST009 TEST010 TEST012 TEST014 TEST310 TEST311 TEST312 TEST313 TEST314 TEST315"
+ sbtestfiles="TEST001 TEST002 TEST003 TEST004 TEST008 TEST009 TEST010 TEST012 TEST014 TEST310 TEST311 TEST312 TEST313 TEST314 TEST315 TEST3265"
sbprettyfiles=
for i in $prettyfiles; do
for j in $sbtestfiles; do