You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2023/06/27 18:22:10 UTC

[arrow-datafusion] branch main updated: Support hex string literal (#6767)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 4f2933f93b Support hex string literal (#6767)
4f2933f93b is described below

commit 4f2933f93b2a163eaad1c7db9f596ee670f8fd32
Author: WEI Xikai <Sh...@users.noreply.github.com>
AuthorDate: Wed Jun 28 02:22:05 2023 +0800

    Support hex string literal (#6767)
---
 datafusion/sql/src/expr/value.rs | 69 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)

diff --git a/datafusion/sql/src/expr/value.rs b/datafusion/sql/src/expr/value.rs
index 5a64d36330..c34b657181 100644
--- a/datafusion/sql/src/expr/value.rs
+++ b/datafusion/sql/src/expr/value.rs
@@ -40,6 +40,15 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             Value::Placeholder(param) => {
                 Self::create_placeholder_expr(param, param_data_types)
             }
+            Value::HexStringLiteral(s) => {
+                if let Some(v) = try_decode_hex_literal(&s) {
+                    Ok(lit(v))
+                } else {
+                    Err(DataFusionError::Plan(format!(
+                        "Invalid HexStringLiteral '{s}'"
+                    )))
+                }
+            }
             _ => Err(DataFusionError::Plan(format!(
                 "Unsupported Value '{value:?}'",
             ))),
@@ -357,3 +366,63 @@ fn has_units(val: &str) -> bool {
         || val.ends_with("nanosecond")
         || val.ends_with("nanoseconds")
 }
+
+/// Try to decode bytes from hex literal string.
+///
+/// None will be returned if the input literal is hex-invalid.
+fn try_decode_hex_literal(s: &str) -> Option<Vec<u8>> {
+    let hex_bytes = s.as_bytes();
+
+    let mut decoded_bytes = Vec::with_capacity((hex_bytes.len() + 1) / 2);
+
+    let start_idx = hex_bytes.len() % 2;
+    if start_idx > 0 {
+        // The first byte is formed of only one char.
+        decoded_bytes.push(try_decode_hex_char(hex_bytes[0])?);
+    }
+
+    for i in (start_idx..hex_bytes.len()).step_by(2) {
+        let high = try_decode_hex_char(hex_bytes[i])?;
+        let low = try_decode_hex_char(hex_bytes[i + 1])?;
+        decoded_bytes.push(high << 4 | low);
+    }
+
+    Some(decoded_bytes)
+}
+
+/// Try to decode a byte from a hex char.
+///
+/// None will be returned if the input char is hex-invalid.
+const fn try_decode_hex_char(c: u8) -> Option<u8> {
+    match c {
+        b'A'..=b'F' => Some(c - b'A' + 10),
+        b'a'..=b'f' => Some(c - b'a' + 10),
+        b'0'..=b'9' => Some(c - b'0'),
+        _ => None,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_decode_hex_literal() {
+        let cases = [
+            ("", Some(vec![])),
+            ("FF00", Some(vec![255, 0])),
+            ("a00a", Some(vec![160, 10])),
+            ("FF0", Some(vec![15, 240])),
+            ("f", Some(vec![15])),
+            ("FF0X", None),
+            ("X0", None),
+            ("XX", None),
+            ("x", None),
+        ];
+
+        for (input, expect) in cases {
+            let output = try_decode_hex_literal(input);
+            assert_eq!(output, expect);
+        }
+    }
+}