You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "aprimadi (via GitHub)" <gi...@apache.org> on 2023/05/18 08:54:28 UTC

[GitHub] [arrow-datafusion] aprimadi commented on a diff in pull request #6355: Add` COPY .. TO ..` syntax support

aprimadi commented on code in PR #6355:
URL: https://github.com/apache/arrow-datafusion/pull/6355#discussion_r1197486690


##########
datafusion/sql/src/parser.rs:
##########
@@ -242,6 +334,75 @@ impl<'a> DFParser<'a> {
         }))
     }
 
+    /// Parse a SQL `COPY TO` statement
+    pub fn parse_copy(&mut self) -> Result<Statement, ParserError> {
+        // parse as a query
+        let source = if self.parser.consume_token(&Token::LParen) {
+            let query = self.parser.parse_query()?;
+            self.parser.expect_token(&Token::RParen)?;
+            CopyToSource::Query(query)
+        } else {
+            // parse as table reference
+            let table_name = self.parser.parse_object_name()?;
+            CopyToSource::Relation(table_name)
+        };
+
+        self.parser.expect_keyword(Keyword::TO)?;
+
+        let target = self.parser.parse_literal_string()?;
+
+        // check for options in parens
+        let options = if self.parser.peek_token().token == Token::LParen {
+            self.parse_value_options()?
+        } else {
+            HashMap::new()
+        };
+
+        Ok(Statement::CopyTo(CopyToStatement {
+            source,
+            target,
+            options,
+        }))
+    }
+
+    /// Parse the next token as a key name for an option list
+    ///
+    /// Note this is different than [`parse_literal_string`]
+    /// because it it allows keywords as well as other non words

Review Comment:
   ```suggestion
       /// because it allows keywords as well as other non words
   ```



##########
datafusion/core/src/execution/context.rs:
##########
@@ -1649,45 +1653,58 @@ impl SessionState {
         // table providers for all relations referenced in this query
         let mut relations = hashbrown::HashSet::with_capacity(10);
 
-        match statement {
-            DFStatement::Statement(s) => {
-                struct RelationVisitor<'a>(&'a mut hashbrown::HashSet<ObjectName>);
+        struct RelationVisitor<'a>(&'a mut hashbrown::HashSet<ObjectName>);
+
+        impl<'a> RelationVisitor<'a> {
+            /// Record that `relation` was used in this statement
+            fn insert(&mut self, relation: &ObjectName) {

Review Comment:
   Nice refactor



##########
datafusion/sql/src/parser.rs:
##########
@@ -242,6 +334,75 @@ impl<'a> DFParser<'a> {
         }))
     }
 
+    /// Parse a SQL `COPY TO` statement
+    pub fn parse_copy(&mut self) -> Result<Statement, ParserError> {
+        // parse as a query
+        let source = if self.parser.consume_token(&Token::LParen) {
+            let query = self.parser.parse_query()?;
+            self.parser.expect_token(&Token::RParen)?;
+            CopyToSource::Query(query)
+        } else {
+            // parse as table reference
+            let table_name = self.parser.parse_object_name()?;
+            CopyToSource::Relation(table_name)
+        };
+
+        self.parser.expect_keyword(Keyword::TO)?;
+
+        let target = self.parser.parse_literal_string()?;
+
+        // check for options in parens
+        let options = if self.parser.peek_token().token == Token::LParen {
+            self.parse_value_options()?
+        } else {
+            HashMap::new()
+        };
+
+        Ok(Statement::CopyTo(CopyToStatement {
+            source,
+            target,
+            options,
+        }))
+    }
+
+    /// Parse the next token as a key name for an option list
+    ///
+    /// Note this is different than [`parse_literal_string`]
+    /// because it it allows keywords as well as other non words
+    ///
+    /// [`parse_literal_string`]: sqlparser::parser::Parser::parse_literal_string
+    pub fn parse_option_key(&mut self) -> Result<String, ParserError> {
+        let next_token = self.parser.next_token();
+        match next_token.token {
+            Token::Word(Word { value, .. }) => Ok(value),
+            Token::SingleQuotedString(s) => Ok(s),
+            Token::DoubleQuotedString(s) => Ok(s),
+            Token::EscapedStringLiteral(s) => Ok(s),
+            _ => self.parser.expected("key name", next_token),
+        }
+    }
+
+    /// Parse the next token as a value for an option list
+    ///
+    /// Note this is different than [`parse_value`] as it allows any
+    /// word or keyword in this location.
+    ///
+    /// [`parse_value`]: sqlparser::parser::Parser::parse_value
+    pub fn parse_option_value(&mut self) -> Result<Value, ParserError> {
+        let next_token = self.parser.next_token();
+        match next_token.token {
+            Token::Word(Word { value, .. }) => Ok(Value::UnQuotedString(value)),
+            Token::SingleQuotedString(s) => Ok(Value::SingleQuotedString(s)),
+            Token::DoubleQuotedString(s) => Ok(Value::DoubleQuotedString(s)),
+            Token::EscapedStringLiteral(s) => Ok(Value::EscapedStringLiteral(s)),
+            Token::Number(ref n, l) => match n.parse() {
+                Ok(n) => Ok(Value::Number(n, l)),
+                Err(e) => parser_err!(format!("Could not parse '{n}' as number: {e}")),
+            },

Review Comment:
   Just a question: what is these lines of code trying to do?
   
   I might be wrong about this but it seems that n in Token::number(ref n, l) is already guaranteed by the tokenizer to be a number.



##########
datafusion/sql/src/parser.rs:
##########
@@ -513,14 +674,42 @@ impl<'a> DFParser<'a> {
         }
     }
 
-    fn parse_options(&mut self) -> Result<HashMap<String, String>, ParserError> {
-        let mut options: HashMap<String, String> = HashMap::new();
+    /// Parses (key value) style options where the values are literal strings.
+    fn parse_string_options(&mut self) -> Result<HashMap<String, String>, ParserError> {
+        let mut options = HashMap::new();
         self.parser.expect_token(&Token::LParen)?;
 
         loop {
             let key = self.parser.parse_literal_string()?;
             let value = self.parser.parse_literal_string()?;
-            options.insert(key.to_string(), value.to_string());
+            options.insert(key, value);
+            let comma = self.parser.consume_token(&Token::Comma);
+            if self.parser.consume_token(&Token::RParen) {
+                // allow a trailing comma, even though it's not in standard
+                break;
+            } else if !comma {
+                return self.expected(
+                    "',' or ')' after option definition",
+                    self.parser.peek_token(),
+                );
+            }
+        }
+        Ok(options)
+    }
+
+    /// parses (key value) style options into a map of String --> [`Value`].
+    ///
+    /// unlike [`Self::parse_string_options`], this method supports
+    /// keywords as key names as well as multiple value types such as
+    /// Numbers as well as Strings.

Review Comment:
   ```suggestion
       /// Parses (key value) style options into a map of String --> [`Value`].
       ///
       /// Unlike [`Self::parse_string_options`], this method supports
       /// keywords as key names as well as multiple value types such as
       /// Numbers as well as Strings.
   ```
   Perhaps this, just to be consistent with other documentation styling.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org