You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "tustvold (via GitHub)" <gi...@apache.org> on 2023/03/16 19:00:49 UTC

[GitHub] [arrow-rs] tustvold commented on a diff in pull request #3860: Faster time parsing (~93% faster)

tustvold commented on code in PR #3860:
URL: https://github.com/apache/arrow-rs/pull/3860#discussion_r1139239408


##########
arrow-cast/src/parse.rs:
##########
@@ -291,79 +292,118 @@ fn to_timestamp_nanos(dt: NaiveDateTime) -> Result<i64, ArrowError> {
 /// This function does not support parsing strings with a timezone
 /// or offset specified, as it considers only time since midnight.
 pub fn string_to_time_nanoseconds(s: &str) -> Result<i64, ArrowError> {
-    // colon count, presence of decimal, presence of whitespace
-    fn preprocess_time_string(string: &str) -> (usize, bool, bool) {
-        string
-            .as_bytes()
-            .iter()
-            .fold((0, false, false), |tup, char| match char {
-                b':' => (tup.0 + 1, tup.1, tup.2),
-                b'.' => (tup.0, true, tup.2),
-                b' ' => (tup.0, tup.1, true),
-                _ => tup,
-            })
+    let nt = string_to_time(s).ok_or_else(|| {
+        ArrowError::ParseError(format!("Failed to parse \'{s}\' as time"))
+    })?;
+    Ok(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
+}
+
+fn string_to_time(s: &str) -> Option<NaiveTime> {
+    let bytes = s.as_bytes();
+    if bytes.len() < 4 {
+        return None;
     }
 
-    // Do a preprocess pass of the string to prune which formats to attempt parsing for
-    let formats: &[&str] = match preprocess_time_string(s.trim()) {
-        // 24-hour clock, with hour, minutes, seconds and fractions of a second specified
-        // Examples:
-        // * 09:50:12.123456789
-        // *  9:50:12.123456789
-        (2, true, false) => &["%H:%M:%S%.f", "%k:%M:%S%.f"],
-
-        // 12-hour clock, with hour, minutes, seconds and fractions of a second specified
-        // Examples:
-        // * 09:50:12.123456789 PM
-        // * 09:50:12.123456789 pm
-        // *  9:50:12.123456789 AM
-        // *  9:50:12.123456789 am
-        (2, true, true) => &[
-            "%I:%M:%S%.f %P",
-            "%I:%M:%S%.f %p",
-            "%l:%M:%S%.f %P",
-            "%l:%M:%S%.f %p",
-        ],
-
-        // 24-hour clock, with hour, minutes and seconds specified
-        // Examples:
-        // * 09:50:12
-        // *  9:50:12
-        (2, false, false) => &["%H:%M:%S", "%k:%M:%S"],
-
-        // 12-hour clock, with hour, minutes and seconds specified
-        // Examples:
-        // * 09:50:12 PM
-        // * 09:50:12 pm
-        // *  9:50:12 AM
-        // *  9:50:12 am
-        (2, false, true) => &["%I:%M:%S %P", "%I:%M:%S %p", "%l:%M:%S %P", "%l:%M:%S %p"],
-
-        // 24-hour clock, with hour and minutes specified
-        // Examples:
-        // * 09:50
-        // *  9:50
-        (1, false, false) => &["%H:%M", "%k:%M"],
-
-        // 12-hour clock, with hour and minutes specified
-        // Examples:
-        // * 09:50 PM
-        // * 09:50 pm
-        // *  9:50 AM
-        // *  9:50 am
-        (1, false, true) => &["%I:%M %P", "%I:%M %p", "%l:%M %P", "%l:%M %p"],
-
-        _ => &[],
+    let (am, bytes) = match bytes.get(bytes.len() - 3..) {
+        Some(b" AM") => (Some(true), &bytes[..bytes.len() - 3]),

Review Comment:
   Turns out this is yet another case of chrono doing something differently from what it is documented to do.... Will fix...



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org