You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ji...@apache.org on 2022/02/05 05:51:20 UTC

[arrow-datafusion] branch master updated: use clap 3 style args parsing for datafusion cli (#1749)

This is an automated email from the ASF dual-hosted git repository.

jiayuliu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 40df55f  use clap 3 style args parsing for datafusion cli (#1749)
40df55f is described below

commit 40df55f7a6d9b816eae4ec073736327009eb8d4f
Author: Jiayu Liu <Ji...@users.noreply.github.com>
AuthorDate: Sat Feb 5 13:51:13 2022 +0800

    use clap 3 style args parsing for datafusion cli (#1749)
    
    * use clap 3 style args parsing for datafusion cli
    
    * upgrade cli version
---
 datafusion-cli/Cargo.toml          |   3 +-
 datafusion-cli/src/command.rs      |  11 ++-
 datafusion-cli/src/exec.rs         |  10 +--
 datafusion-cli/src/functions.rs    |   2 +-
 datafusion-cli/src/lib.rs          |   1 -
 datafusion-cli/src/main.rs         | 162 +++++++++++++++----------------------
 datafusion-cli/src/print_format.rs |  70 +---------------
 7 files changed, 80 insertions(+), 179 deletions(-)

diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index e8f1e30..06a7e87 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -17,7 +17,8 @@
 
 [package]
 name = "datafusion-cli"
-version = "5.1.0"
+description = "DataFusion is an in-memory query engine that uses Apache Arrow as the memory model. It supports executing SQL queries against CSV and Parquet files as well as querying directly against in-memory data."
+version = "6.0.0"
 authors = ["Apache Arrow <de...@arrow.apache.org>"]
 edition = "2021"
 keywords = [ "arrow", "datafusion", "ballista", "query", "sql" ]
diff --git a/datafusion-cli/src/command.rs b/datafusion-cli/src/command.rs
index ef6f67d..0fd43a3 100644
--- a/datafusion-cli/src/command.rs
+++ b/datafusion-cli/src/command.rs
@@ -20,7 +20,8 @@
 use crate::context::Context;
 use crate::functions::{display_all_functions, Function};
 use crate::print_format::PrintFormat;
-use crate::print_options::{self, PrintOptions};
+use crate::print_options::PrintOptions;
+use clap::ArgEnum;
 use datafusion::arrow::array::{ArrayRef, StringArray};
 use datafusion::arrow::datatypes::{DataType, Field, Schema};
 use datafusion::arrow::record_batch::RecordBatch;
@@ -206,10 +207,14 @@ impl OutputFormat {
             Self::ChangeFormat(format) => {
                 if let Ok(format) = format.parse::<PrintFormat>() {
                     print_options.format = format;
-                    println!("Output format is {}.", print_options.format);
+                    println!("Output format is {:?}.", print_options.format);
                     Ok(())
                 } else {
-                    Err(DataFusionError::Execution(format!("{} is not a valid format type [possible values: csv, tsv, table, json, ndjson]", format)))
+                    Err(DataFusionError::Execution(format!(
+                        "{:?} is not a valid format type [possible values: {:?}]",
+                        format,
+                        PrintFormat::value_variants()
+                    )))
                 }
             }
         }
diff --git a/datafusion-cli/src/exec.rs b/datafusion-cli/src/exec.rs
index dad6d6e..17b329b 100644
--- a/datafusion-cli/src/exec.rs
+++ b/datafusion-cli/src/exec.rs
@@ -21,20 +21,14 @@ use crate::{
     command::{Command, OutputFormat},
     context::Context,
     helper::CliHelper,
-    print_format::{all_print_formats, PrintFormat},
     print_options::PrintOptions,
 };
-use datafusion::arrow::record_batch::RecordBatch;
-use datafusion::arrow::util::pretty;
-use datafusion::error::{DataFusionError, Result};
-use rustyline::config::Config;
+use datafusion::error::Result;
 use rustyline::error::ReadlineError;
 use rustyline::Editor;
 use std::fs::File;
 use std::io::prelude::*;
 use std::io::BufReader;
-use std::str::FromStr;
-use std::sync::Arc;
 use std::time::Instant;
 
 /// run and execute SQL statements and commands from a file, against a context with the given print options
@@ -109,7 +103,7 @@ pub async fn exec_from_repl(ctx: &mut Context, print_options: &mut PrintOptions)
                                     );
                                 }
                             } else {
-                                println!("Output format is {}.", print_options.format);
+                                println!("Output format is {:?}.", print_options.format);
                             }
                         }
                         _ => {
diff --git a/datafusion-cli/src/functions.rs b/datafusion-cli/src/functions.rs
index 2372e64..98b698a 100644
--- a/datafusion-cli/src/functions.rs
+++ b/datafusion-cli/src/functions.rs
@@ -20,7 +20,7 @@ use arrow::array::StringArray;
 use arrow::datatypes::{DataType, Field, Schema};
 use arrow::record_batch::RecordBatch;
 use arrow::util::pretty::pretty_format_batches;
-use datafusion::error::{DataFusionError, Result};
+use datafusion::error::Result;
 use std::fmt;
 use std::str::FromStr;
 use std::sync::Arc;
diff --git a/datafusion-cli/src/lib.rs b/datafusion-cli/src/lib.rs
index b2bcdd3..b75be33 100644
--- a/datafusion-cli/src/lib.rs
+++ b/datafusion-cli/src/lib.rs
@@ -16,7 +16,6 @@
 // under the License.
 
 #![doc = include_str!("../README.md")]
-#![allow(unused_imports)]
 pub const DATAFUSION_CLI_VERSION: &str = env!("CARGO_PKG_VERSION");
 
 pub mod command;
diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs
index 4cb9e9d..788bb27 100644
--- a/datafusion-cli/src/main.rs
+++ b/datafusion-cli/src/main.rs
@@ -15,14 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use clap::{crate_version, App, Arg};
+use clap::Parser;
 use datafusion::error::Result;
 use datafusion::execution::context::ExecutionConfig;
 use datafusion_cli::{
-    context::Context,
-    exec,
-    print_format::{all_print_formats, PrintFormat},
-    print_options::PrintOptions,
+    context::Context, exec, print_format::PrintFormat, print_options::PrintOptions,
     DATAFUSION_CLI_VERSION,
 };
 use std::env;
@@ -30,117 +27,84 @@ use std::fs::File;
 use std::io::BufReader;
 use std::path::Path;
 
+#[derive(Debug, Parser, PartialEq)]
+#[clap(author, version, about, long_about= None)]
+struct Args {
+    #[clap(
+        short = 'p',
+        long,
+        help = "Path to your data, default to current directory",
+        validator(is_valid_data_dir)
+    )]
+    data_path: Option<String>,
+
+    #[clap(
+        short = 'c',
+        long,
+        help = "The batch size of each query, or use DataFusion default",
+        validator(is_valid_batch_size)
+    )]
+    batch_size: Option<usize>,
+
+    #[clap(
+        short,
+        long,
+        multiple_values = true,
+        help = "Execute commands from file(s), then exit",
+        validator(is_valid_file)
+    )]
+    file: Vec<String>,
+
+    #[clap(long, arg_enum, default_value_t = PrintFormat::Table)]
+    format: PrintFormat,
+
+    #[clap(long, help = "Ballista scheduler host")]
+    host: Option<String>,
+
+    #[clap(long, help = "Ballista scheduler port")]
+    port: Option<u16>,
+
+    #[clap(
+        short,
+        long,
+        help = "Reduce printing other than the results and work quietly"
+    )]
+    quiet: bool,
+}
+
 #[tokio::main]
 pub async fn main() -> Result<()> {
-    let matches = App::new("DataFusion")
-        .version(crate_version!())
-        .about(
-            "DataFusion is an in-memory query engine that uses Apache Arrow \
-             as the memory model. It supports executing SQL queries against CSV and \
-             Parquet files as well as querying directly against in-memory data.",
-        )
-        .arg(
-            Arg::new("data-path")
-                .help("Path to your data, default to current directory")
-                .short('p')
-                .long("data-path")
-                .validator(is_valid_data_dir)
-                .takes_value(true),
-        )
-        .arg(
-            Arg::new("batch-size")
-                .help("The batch size of each query, or use DataFusion default")
-                .short('c')
-                .long("batch-size")
-                .validator(is_valid_batch_size)
-                .takes_value(true),
-        )
-        .arg(
-            Arg::new("file")
-                .help("Execute commands from file(s), then exit")
-                .short('f')
-                .long("file")
-                .multiple_occurrences(true)
-                .validator(is_valid_file)
-                .takes_value(true),
-        )
-        .arg(
-            Arg::new("format")
-                .help("Output format")
-                .long("format")
-                .default_value("table")
-                .possible_values(
-                    &all_print_formats()
-                        .iter()
-                        .map(|format| format.to_string())
-                        .collect::<Vec<_>>()
-                        .iter()
-                        .map(|i| i.as_str())
-                        .collect::<Vec<_>>(),
-                )
-                .takes_value(true),
-        )
-        .arg(
-            Arg::new("host")
-                .help("Ballista scheduler host")
-                .long("host")
-                .takes_value(true),
-        )
-        .arg(
-            Arg::new("port")
-                .help("Ballista scheduler port")
-                .long("port")
-                .takes_value(true),
-        )
-        .arg(
-            Arg::new("quiet")
-                .help("Reduce printing other than the results and work quietly")
-                .short('q')
-                .long("quiet")
-                .takes_value(false),
-        )
-        .get_matches();
-
-    let quiet = matches.is_present("quiet");
-
-    if !quiet {
-        println!("DataFusion CLI v{}\n", DATAFUSION_CLI_VERSION);
-    }
+    let args = Args::parse();
 
-    let host = matches.value_of("host");
-    let port = matches
-        .value_of("port")
-        .and_then(|port| port.parse::<u16>().ok());
+    if !args.quiet {
+        println!("DataFusion CLI v{}", DATAFUSION_CLI_VERSION);
+    }
 
-    if let Some(path) = matches.value_of("data-path") {
+    if let Some(ref path) = args.data_path {
         let p = Path::new(path);
         env::set_current_dir(&p).unwrap();
     };
 
     let mut execution_config = ExecutionConfig::new().with_information_schema(true);
 
-    if let Some(batch_size) = matches
-        .value_of("batch-size")
-        .and_then(|size| size.parse::<usize>().ok())
-    {
+    if let Some(batch_size) = args.batch_size {
         execution_config = execution_config.with_batch_size(batch_size);
     };
 
-    let mut ctx: Context = match (host, port) {
-        (Some(h), Some(p)) => Context::new_remote(h, p)?,
+    let mut ctx: Context = match (args.host, args.port) {
+        (Some(ref h), Some(p)) => Context::new_remote(h, p)?,
         _ => Context::new_local(&execution_config),
     };
 
-    let format = matches
-        .value_of("format")
-        .expect("No format is specified")
-        .parse::<PrintFormat>()
-        .expect("Invalid format");
-
-    let mut print_options = PrintOptions { format, quiet };
+    let mut print_options = PrintOptions {
+        format: args.format,
+        quiet: args.quiet,
+    };
 
-    if let Some(file_paths) = matches.values_of("file") {
-        let files = file_paths
+    let files = args.file;
+    if !files.is_empty() {
+        let files = files
+            .into_iter()
             .map(|file_path| File::open(file_path).unwrap())
             .collect::<Vec<_>>();
         for file in files {
diff --git a/datafusion-cli/src/print_format.rs b/datafusion-cli/src/print_format.rs
index 0320166..05a1ef7 100644
--- a/datafusion-cli/src/print_format.rs
+++ b/datafusion-cli/src/print_format.rs
@@ -21,11 +21,10 @@ use arrow::json::{ArrayWriter, LineDelimitedWriter};
 use datafusion::arrow::record_batch::RecordBatch;
 use datafusion::arrow::util::pretty;
 use datafusion::error::{DataFusionError, Result};
-use std::fmt;
 use std::str::FromStr;
 
 /// Allow records to be printed in different formats
-#[derive(Debug, PartialEq, Eq, Clone)]
+#[derive(Debug, PartialEq, Eq, clap::ArgEnum, Clone)]
 pub enum PrintFormat {
     Csv,
     Tsv,
@@ -34,40 +33,11 @@ pub enum PrintFormat {
     NdJson,
 }
 
-/// returns all print formats
-pub fn all_print_formats() -> Vec<PrintFormat> {
-    vec![
-        PrintFormat::Csv,
-        PrintFormat::Tsv,
-        PrintFormat::Table,
-        PrintFormat::Json,
-        PrintFormat::NdJson,
-    ]
-}
-
 impl FromStr for PrintFormat {
-    type Err = ();
-    fn from_str(s: &str) -> std::result::Result<Self, ()> {
-        match s.to_lowercase().as_str() {
-            "csv" => Ok(Self::Csv),
-            "tsv" => Ok(Self::Tsv),
-            "table" => Ok(Self::Table),
-            "json" => Ok(Self::Json),
-            "ndjson" => Ok(Self::NdJson),
-            _ => Err(()),
-        }
-    }
-}
+    type Err = String;
 
-impl fmt::Display for PrintFormat {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match *self {
-            Self::Csv => write!(f, "csv"),
-            Self::Tsv => write!(f, "tsv"),
-            Self::Table => write!(f, "table"),
-            Self::Json => write!(f, "json"),
-            Self::NdJson => write!(f, "ndjson"),
-        }
+    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
+        clap::ArgEnum::from_str(s, true)
     }
 }
 
@@ -124,38 +94,6 @@ mod tests {
     use std::sync::Arc;
 
     #[test]
-    fn test_from_str() {
-        let format = "csv".parse::<PrintFormat>().unwrap();
-        assert_eq!(PrintFormat::Csv, format);
-
-        let format = "tsv".parse::<PrintFormat>().unwrap();
-        assert_eq!(PrintFormat::Tsv, format);
-
-        let format = "json".parse::<PrintFormat>().unwrap();
-        assert_eq!(PrintFormat::Json, format);
-
-        let format = "ndjson".parse::<PrintFormat>().unwrap();
-        assert_eq!(PrintFormat::NdJson, format);
-
-        let format = "table".parse::<PrintFormat>().unwrap();
-        assert_eq!(PrintFormat::Table, format);
-    }
-
-    #[test]
-    fn test_to_str() {
-        assert_eq!("csv", PrintFormat::Csv.to_string());
-        assert_eq!("table", PrintFormat::Table.to_string());
-        assert_eq!("tsv", PrintFormat::Tsv.to_string());
-        assert_eq!("json", PrintFormat::Json.to_string());
-        assert_eq!("ndjson", PrintFormat::NdJson.to_string());
-    }
-
-    #[test]
-    fn test_from_str_failure() {
-        assert!("pretty".parse::<PrintFormat>().is_err());
-    }
-
-    #[test]
     fn test_print_batches_with_sep() {
         let batches = vec![];
         assert_eq!("", print_batches_with_sep(&batches, b',').unwrap());