You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2023/05/16 14:15:27 UTC

[arrow] branch main updated: GH-35448: [C++] Fix detection of %z in strptime format (#35449)

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 0980dbe330 GH-35448: [C++] Fix detection of %z in strptime format (#35449)
0980dbe330 is described below

commit 0980dbe330b77cf17fa2a2bb65eba43a992cfd68
Author: Joris Van den Bossche <jo...@gmail.com>
AuthorDate: Tue May 16 16:15:20 2023 +0200

    GH-35448: [C++] Fix detection of %z in strptime format (#35449)
    
    ### Rationale for this change
    
    See gh-35448 for the failing example. The current code in `GetZone` was assuming there was always some character between the `%z` and the preceding `%` code (like a whitespace, or `-` or `/`). That is often not the case with `%z` (in time formats like `00:00+01`, the `+` is part of `%z`, and so the format is `%H:%M%z` without character between `%M` and `%z`)
    
    ### Are these changes tested?
    
    Test is added
    
    ### Are there any user-facing changes?
    
    The result type will no now correctly have a `tz=UTC` parametrization
    * Closes: #35448
    
    Authored-by: Joris Van den Bossche <jo...@gmail.com>
    Signed-off-by: Antoine Pitrou <an...@python.org>
---
 cpp/src/arrow/compute/kernels/scalar_string_test.cc    | 14 ++++++++++----
 cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc |  1 -
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index a98b593732..4581e6377a 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -1895,11 +1895,17 @@ TYPED_TEST(TestStringKernels, StrptimeZoneOffset) {
   // N.B. BSD strptime only supports (+/-)HHMM and not the wider range
   // of values GNU strptime supports.
   std::string input1 = R"(["5/1/2020 +0100", null, "12/11/1900 -0130"])";
-  std::string output1 =
+  std::string output =
       R"(["2020-04-30T23:00:00.000000", null, "1900-12-11T01:30:00.000000"])";
-  StrptimeOptions options("%m/%d/%Y %z", TimeUnit::MICRO, /*error_is_null=*/true);
-  this->CheckUnary("strptime", input1, timestamp(TimeUnit::MICRO, "UTC"), output1,
-                   &options);
+  StrptimeOptions options1("%m/%d/%Y %z", TimeUnit::MICRO, /*error_is_null=*/true);
+  this->CheckUnary("strptime", input1, timestamp(TimeUnit::MICRO, "UTC"), output,
+                   &options1);
+
+  // format without whitespace before %z (GH-35448)
+  std::string input2 = R"(["2020-05-01T00:00+0100", null, "1900-12-11T00:00-0130"])";
+  StrptimeOptions options2("%Y-%m-%dT%H:%M%z", TimeUnit::MICRO, /*error_is_null=*/true);
+  this->CheckUnary("strptime", input2, timestamp(TimeUnit::MICRO, "UTC"), output,
+                   &options2);
 }
 
 TYPED_TEST(TestStringKernels, StrptimeDoesNotProvideDefaultOptions) {
diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
index 3addaf6863..a88ce38936 100644
--- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
@@ -1237,7 +1237,6 @@ const std::string GetZone(const std::string& format) {
         zone = "UTC";
         break;
       }
-      cur++;
     } else {
       count = 0;
     }