You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2023/05/16 14:15:27 UTC
[arrow] branch main updated: GH-35448: [C++] Fix detection of %z in strptime format (#35449)
This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 0980dbe330 GH-35448: [C++] Fix detection of %z in strptime format (#35449)
0980dbe330 is described below
commit 0980dbe330b77cf17fa2a2bb65eba43a992cfd68
Author: Joris Van den Bossche <jo...@gmail.com>
AuthorDate: Tue May 16 16:15:20 2023 +0200
GH-35448: [C++] Fix detection of %z in strptime format (#35449)
### Rationale for this change
See gh-35448 for the failing example. The current code in `GetZone` was assuming there was always some character between the `%z` and the preceding `%` code (like a whitespace, or `-` or `/`). That is often not the case with `%z` (in time formats like `00:00+01`, the `+` is part of `%z`, and so the format is `%H:%M%z` without character between `%M` and `%z`)
### Are these changes tested?
Test is added
### Are there any user-facing changes?
The result type will no now correctly have a `tz=UTC` parametrization
* Closes: #35448
Authored-by: Joris Van den Bossche <jo...@gmail.com>
Signed-off-by: Antoine Pitrou <an...@python.org>
---
cpp/src/arrow/compute/kernels/scalar_string_test.cc | 14 ++++++++++----
cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc | 1 -
2 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index a98b593732..4581e6377a 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -1895,11 +1895,17 @@ TYPED_TEST(TestStringKernels, StrptimeZoneOffset) {
// N.B. BSD strptime only supports (+/-)HHMM and not the wider range
// of values GNU strptime supports.
std::string input1 = R"(["5/1/2020 +0100", null, "12/11/1900 -0130"])";
- std::string output1 =
+ std::string output =
R"(["2020-04-30T23:00:00.000000", null, "1900-12-11T01:30:00.000000"])";
- StrptimeOptions options("%m/%d/%Y %z", TimeUnit::MICRO, /*error_is_null=*/true);
- this->CheckUnary("strptime", input1, timestamp(TimeUnit::MICRO, "UTC"), output1,
- &options);
+ StrptimeOptions options1("%m/%d/%Y %z", TimeUnit::MICRO, /*error_is_null=*/true);
+ this->CheckUnary("strptime", input1, timestamp(TimeUnit::MICRO, "UTC"), output,
+ &options1);
+
+ // format without whitespace before %z (GH-35448)
+ std::string input2 = R"(["2020-05-01T00:00+0100", null, "1900-12-11T00:00-0130"])";
+ StrptimeOptions options2("%Y-%m-%dT%H:%M%z", TimeUnit::MICRO, /*error_is_null=*/true);
+ this->CheckUnary("strptime", input2, timestamp(TimeUnit::MICRO, "UTC"), output,
+ &options2);
}
TYPED_TEST(TestStringKernels, StrptimeDoesNotProvideDefaultOptions) {
diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
index 3addaf6863..a88ce38936 100644
--- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
@@ -1237,7 +1237,6 @@ const std::string GetZone(const std::string& format) {
zone = "UTC";
break;
}
- cur++;
} else {
count = 0;
}