You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "jackwener (via GitHub)" <gi...@apache.org> on 2023/06/26 13:14:12 UTC

[GitHub] [arrow-datafusion] jackwener commented on a diff in pull request #6767: Support hex string literal

jackwener commented on code in PR #6767:
URL: https://github.com/apache/arrow-datafusion/pull/6767#discussion_r1242172439


##########
datafusion/sql/src/expr/value.rs:
##########
@@ -357,3 +366,63 @@ fn has_units(val: &str) -> bool {
         || val.ends_with("nanosecond")
         || val.ends_with("nanoseconds")
 }
+
+/// Try to decode bytes from hex literal string.
+///
+/// None will be returned if the input literal is hex-invalid.
+fn try_decode_hex_literal(s: &str) -> Option<Vec<u8>> {
+    let hex_bytes = s.as_bytes();
+
+    let mut decoded_bytes = Vec::with_capacity((hex_bytes.len() + 1) / 2);
+
+    let start_idx = hex_bytes.len() % 2;
+    if start_idx > 0 {
+        // The first byte is formed of only one char.
+        decoded_bytes.push(try_decode_hex_char(hex_bytes[0])?);
+    }
+
+    for i in (start_idx..hex_bytes.len()).step_by(2) {
+        let high = try_decode_hex_char(hex_bytes[i])?;
+        let low = try_decode_hex_char(hex_bytes[i + 1])?;
+        decoded_bytes.push(high << 4 | low);
+    }
+
+    Some(decoded_bytes)

Review Comment:
   ```suggestion
       Ok(decoded_bytes)
   ```



##########
datafusion/sql/src/expr/value.rs:
##########
@@ -357,3 +366,63 @@ fn has_units(val: &str) -> bool {
         || val.ends_with("nanosecond")
         || val.ends_with("nanoseconds")
 }
+
+/// Try to decode bytes from hex literal string.
+///
+/// None will be returned if the input literal is hex-invalid.
+fn try_decode_hex_literal(s: &str) -> Option<Vec<u8>> {

Review Comment:
   ```suggestion
   fn try_decode_hex_literal(s: &str) -> Result<Vec<u8>> {
   ```



##########
datafusion/sql/src/expr/value.rs:
##########
@@ -357,3 +366,63 @@ fn has_units(val: &str) -> bool {
         || val.ends_with("nanosecond")
         || val.ends_with("nanoseconds")
 }
+
+/// Try to decode bytes from hex literal string.
+///
+/// None will be returned if the input literal is hex-invalid.
+fn try_decode_hex_literal(s: &str) -> Option<Vec<u8>> {
+    let hex_bytes = s.as_bytes();
+
+    let mut decoded_bytes = Vec::with_capacity((hex_bytes.len() + 1) / 2);
+
+    let start_idx = hex_bytes.len() % 2;
+    if start_idx > 0 {
+        // The first byte is formed of only one char.
+        decoded_bytes.push(try_decode_hex_char(hex_bytes[0])?);
+    }
+
+    for i in (start_idx..hex_bytes.len()).step_by(2) {
+        let high = try_decode_hex_char(hex_bytes[i])?;
+        let low = try_decode_hex_char(hex_bytes[i + 1])?;
+        decoded_bytes.push(high << 4 | low);
+    }
+
+    Some(decoded_bytes)
+}
+
+/// Try to decode a byte from a hex char.
+///
+/// None will be returned if the input char is hex-invalid.
+const fn try_decode_hex_char(c: u8) -> Option<u8> {
+    match c {
+        b'A'..=b'F' => Some(c - b'A' + 10),
+        b'a'..=b'f' => Some(c - b'a' + 10),
+        b'0'..=b'9' => Some(c - b'0'),
+        _ => None,
+    }
+}

Review Comment:
   ```suggestion
   fn try_decode_hex_char(c: u8) -> Result<u8> {
       match c {
           b'A'..=b'F' => Ok(c - b'A' + 10),
           b'a'..=b'f' => Ok(c - b'a' + 10),
           b'0'..=b'9' => Ok(c - b'0'),
           _ => Err(DataFusionError::Plan(format!(
               "Invalid hex character: {}",
               c as char
           ))),
       }
   }
   ```



##########
datafusion/sql/src/expr/value.rs:
##########
@@ -357,3 +366,63 @@ fn has_units(val: &str) -> bool {
         || val.ends_with("nanosecond")
         || val.ends_with("nanoseconds")
 }
+
+/// Try to decode bytes from hex literal string.
+///
+/// None will be returned if the input literal is hex-invalid.
+fn try_decode_hex_literal(s: &str) -> Option<Vec<u8>> {
+    let hex_bytes = s.as_bytes();
+
+    let mut decoded_bytes = Vec::with_capacity((hex_bytes.len() + 1) / 2);
+
+    let start_idx = hex_bytes.len() % 2;
+    if start_idx > 0 {
+        // The first byte is formed of only one char.
+        decoded_bytes.push(try_decode_hex_char(hex_bytes[0])?);
+    }
+
+    for i in (start_idx..hex_bytes.len()).step_by(2) {
+        let high = try_decode_hex_char(hex_bytes[i])?;
+        let low = try_decode_hex_char(hex_bytes[i + 1])?;
+        decoded_bytes.push(high << 4 | low);
+    }
+
+    Some(decoded_bytes)
+}
+
+/// Try to decode a byte from a hex char.
+///
+/// None will be returned if the input char is hex-invalid.
+const fn try_decode_hex_char(c: u8) -> Option<u8> {
+    match c {
+        b'A'..=b'F' => Some(c - b'A' + 10),
+        b'a'..=b'f' => Some(c - b'a' + 10),
+        b'0'..=b'9' => Some(c - b'0'),
+        _ => None,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_decode_hex_literal() {
+        let cases = [
+            ("", Some(vec![])),
+            ("FF00", Some(vec![255, 0])),
+            ("a00a", Some(vec![160, 10])),
+            ("FF0", Some(vec![15, 240])),
+            ("f", Some(vec![15])),
+            ("FF0X", None),
+            ("X0", None),
+            ("XX", None),
+            ("x", None),
+        ];
+
+        for (input, expect) in cases {
+            let output = try_decode_hex_literal(input);

Review Comment:
   ```suggestion
               let output = try_decode_hex_literal(input).map_or(None, |v| Some(v));
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org