You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2020/02/05 16:43:36 UTC

[impala] 03/03: IMPALA-9280: Fix parsing of timestamp with dash before TZH

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit d3e90af1dbba3175e31956224f89f9a33e1916cb
Author: Riza Suminto <ri...@cloudera.com>
AuthorDate: Wed Jan 29 10:23:36 2020 -0800

    IMPALA-9280: Fix parsing of timestamp with dash before TZH
    
    In ISO SQL datetime format, '-' is both separator character and sign
    in TZH token. If the TZH token itself doesn't start with a '+' sign
    and the separator sequence contains more than one separators then the
    last '-' of the separator sequence is taken as the sign of TZH.
    
    Testing:
    * Add test to query_test/test_cast_with_format.py
    
    Change-Id: I24c636e75fd380f6ebd091bcb38bb60a274f0e00
    Reviewed-on: http://gerrit.cloudera.org:8080/15130
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Tim Armstrong <ta...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/runtime/datetime-iso-sql-format-parser.cc | 7 ++++++-
 tests/query_test/test_cast_with_format.py        | 4 ++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/be/src/runtime/datetime-iso-sql-format-parser.cc b/be/src/runtime/datetime-iso-sql-format-parser.cc
index 2069256..f07d576 100644
--- a/be/src/runtime/datetime-iso-sql-format-parser.cc
+++ b/be/src/runtime/datetime-iso-sql-format-parser.cc
@@ -293,6 +293,7 @@ bool IsoSqlFormatParser::ProcessSeparatorSequence(const char** current_pos,
   DCHECK(current_tok_idx != nullptr && *current_tok_idx < dt_ctx.toks.size());
   DCHECK(dt_ctx.toks[*current_tok_idx].type == SEPARATOR);
   if (!IsoSqlFormatTokenizer::IsSeparator(current_pos, end_pos, false)) return false;
+  const char* begin_pos = *current_pos;
   // Advance to the end of the separator sequence.
   ++(*current_pos);
   while (*current_pos < end_pos &&
@@ -320,7 +321,11 @@ bool IsoSqlFormatParser::ProcessSeparatorSequence(const char** current_pos,
   }
 
   // The last '-' of a separator sequence might be taken as a sign for timezone hour.
-  if (*(*current_pos - 1) == '-' && dt_ctx.toks[*current_tok_idx].type == TIMEZONE_HOUR) {
+  // If the TZH token itself doesn't start with a '+' sign and the separator sequence
+  // contains more than one separators then the last '-' of the separator sequence is
+  // taken as the sign of TZH.
+  if (*(*current_pos - 1) == '-' && dt_ctx.toks[*current_tok_idx].type == TIMEZONE_HOUR
+      && *(*current_pos) != '+' && begin_pos != (*current_pos - 1)) {
     --(*current_pos);
   }
   return true;
diff --git a/tests/query_test/test_cast_with_format.py b/tests/query_test/test_cast_with_format.py
index f584bf6..9dbb102 100644
--- a/tests/query_test/test_cast_with_format.py
+++ b/tests/query_test/test_cast_with_format.py
@@ -1160,6 +1160,10 @@ class TestCastWithFormat(ImpalaTestSuite):
         "timestamp FORMAT 'YYYY-MM-DD HH12:MI A.M.TZH:TZM')")
     assert result.data == ["2018-12-31 08:00:00"]
 
+    result = self.client.execute("select cast('2018-12-31 08:00 AM-+1:10' as "
+        "timestamp FORMAT 'YYYY-MM-DD HH12:MI A.M. TZH:TZM')")
+    assert result.data == ["2018-12-31 08:00:00"]
+
     # Invalid TZH and TZM
     result = self.client.execute("select cast('2016-01-01 10:00 AM +16:00' as "
         "timestamp FORMAT 'YYYY-MM-DD HH12:MI A.M. TZH:TZM')")