You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "tz70s (via GitHub)" <gi...@apache.org> on 2023/05/07 04:18:44 UTC

[GitHub] [arrow-datafusion] tz70s commented on a diff in pull request #6257: Enable parser to parse create external clauses in random order

tz70s commented on code in PR #6257:
URL: https://github.com/apache/arrow-datafusion/pull/6257#discussion_r1186779655


##########
datafusion/sql/src/parser.rs:
##########
@@ -379,59 +379,92 @@ impl<'a> DFParser<'a> {
                 .parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]);
         let table_name = self.parser.parse_object_name()?;
         let (columns, _) = self.parse_columns()?;
-        self.parser
-            .expect_keywords(&[Keyword::STORED, Keyword::AS])?;
 
-        // THIS is the main difference: we parse a different file format.
-        let file_type = self.parse_file_format()?;
-
-        let has_header = self.parse_csv_has_header();
-
-        let has_delimiter = self.parse_has_delimiter();
-        let delimiter = match has_delimiter {
-            true => self.parse_delimiter()?,
-            false => ',',
-        };
-
-        let file_compression_type = if self.parse_has_file_compression_type() {
-            self.parse_file_compression_type()?
-        } else {
-            CompressionTypeVariant::UNCOMPRESSED
-        };
-
-        let table_partition_cols = if self.parse_has_partition() {
-            self.parse_partitions()?
-        } else {
-            vec![]
-        };
+        #[derive(Default)]
+        struct Builder {
+            file_type: Option<String>,
+            location: Option<String>,
+            has_header: Option<bool>,
+            delimiter: Option<char>,
+            file_compression_type: Option<CompressionTypeVariant>,
+            table_partition_cols: Option<Vec<String>>,
+            order_exprs: Option<Vec<OrderByExpr>>,
+            options: Option<HashMap<String, String>>,
+        }
+        let mut builder = Builder::default();
 
-        let order_exprs = if self.parse_has_order() {
-            self.parse_order_by_exprs()?
-        } else {
-            vec![]
-        };
+        fn ensure_not_set<T>(field: &Option<T>, name: &str) -> Result<(), ParserError> {
+            if field.is_some() {
+                return Err(ParserError::ParserError(format!(
+                    "{name} specified more than once",
+                )));
+            }
+            Ok(())
+        }
 
-        let options = if self.parse_has_options() {
-            self.parse_options()?
-        } else {
-            HashMap::new()
-        };
+        loop {
+            if self.parser.parse_keyword(Keyword::STORED) {
+                self.parser.expect_keyword(Keyword::AS)?;
+                ensure_not_set(&builder.file_type, "STORED AS")?;
+                builder.file_type = Some(self.parse_file_format()?);
+            } else if self.parser.parse_keyword(Keyword::LOCATION) {
+                ensure_not_set(&builder.location, "LOCATION")?;
+                builder.location = Some(self.parser.parse_literal_string()?);
+            } else if self
+                .parser
+                .parse_keywords(&[Keyword::WITH, Keyword::HEADER])
+            {
+                self.parser.expect_keyword(Keyword::ROW)?;
+                ensure_not_set(&builder.has_header, "WITH HEADER ROW")?;
+                builder.has_header = Some(true);
+            } else if self.parser.parse_keywords(&[Keyword::WITH, Keyword::ORDER]) {
+                ensure_not_set(&builder.order_exprs, "WITH ORDER")?;
+                builder.order_exprs = Some(self.parse_order_by_exprs()?);
+            } else if self.parser.parse_keyword(Keyword::DELIMITER) {
+                ensure_not_set(&builder.delimiter, "DELIMITER")?;
+                builder.delimiter = Some(self.parse_delimiter()?);
+            } else if self.parser.parse_keyword(Keyword::COMPRESSION) {
+                self.parser.expect_keyword(Keyword::TYPE)?;
+                ensure_not_set(&builder.file_compression_type, "COMPRESSION TYPE")?;
+                builder.file_compression_type = Some(self.parse_file_compression_type()?);
+            } else if self.parser.parse_keyword(Keyword::PARTITIONED) {
+                self.parser.expect_keyword(Keyword::BY)?;
+                ensure_not_set(&builder.table_partition_cols, "PARTITIONED BY")?;
+                builder.table_partition_cols = Some(self.parse_partitions()?)
+            } else if self.parser.parse_keyword(Keyword::OPTIONS) {
+                ensure_not_set(&builder.options, "OPTIONS")?;
+                builder.options = Some(self.parse_options()?);
+            } else {
+                break;

Review Comment:
   Not sure will it be worth adding more checks like `expect_one_of_keywords` (or other smarter ways) to make the user experience better? (though I couldn't find it's universally applied to all parser paths)
   
   e.g. a typo like 
   
   ```sql
   CREATE EXTERNAL TABLE t(c1 int)
   STORED AS CSV
   WITH HEAD ROW
   LOCATION 'foo.csv';
   ```
   
   will turn into error `sql parser error: Missing LOCATION clause in CREATE EXTERNAL TABLE statement` which is a bit confusing.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org