You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ji...@apache.org on 2022/02/05 05:03:05 UTC

[arrow-datafusion] branch clap-3 updated (6948a0a -> 475115d)

This is an automated email from the ASF dual-hosted git repository.

jiayuliu pushed a change to branch clap-3
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git.


 discard 6948a0a  use clap 3 style args parsing for datafusion cli
     new 475115d  use clap 3 style args parsing for datafusion cli

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (6948a0a)
            \
             N -- N -- N   refs/heads/clap-3 (475115d)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 datafusion-cli/Cargo.toml     |  2 --
 datafusion-cli/src/command.rs |  1 -
 datafusion-cli/src/main.rs    | 14 --------------
 3 files changed, 17 deletions(-)

[arrow-datafusion] 01/01: use clap 3 style args parsing for datafusion cli

Posted by ji...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

jiayuliu pushed a commit to branch clap-3
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git

commit 475115d94ed1b2c9da8d12352abbb943bb4b2b56
Author: Jiayu Liu <ji...@hey.com>
AuthorDate: Sat Feb 5 00:06:30 2022 +0800

    use clap 3 style args parsing for datafusion cli
---
 datafusion-cli/Cargo.toml          |   1 +
 datafusion-cli/src/command.rs      |  11 ++-
 datafusion-cli/src/exec.rs         |  10 +--
 datafusion-cli/src/functions.rs    |   2 +-
 datafusion-cli/src/lib.rs          |   1 -
 datafusion-cli/src/main.rs         | 162 +++++++++++++++----------------------
 datafusion-cli/src/print_format.rs |  70 +---------------
 7 files changed, 79 insertions(+), 178 deletions(-)

diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index e8f1e30..af1d335 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -17,6 +17,7 @@
 
 [package]
 name = "datafusion-cli"
+description = "DataFusion is an in-memory query engine that uses Apache Arrow as the memory model. It supports executing SQL queries against CSV and Parquet files as well as querying directly against in-memory data."
 version = "5.1.0"
 authors = ["Apache Arrow <de...@arrow.apache.org>"]
 edition = "2021"
diff --git a/datafusion-cli/src/command.rs b/datafusion-cli/src/command.rs
index ef6f67d..0fd43a3 100644
--- a/datafusion-cli/src/command.rs
+++ b/datafusion-cli/src/command.rs
@@ -20,7 +20,8 @@
 use crate::context::Context;
 use crate::functions::{display_all_functions, Function};
 use crate::print_format::PrintFormat;
-use crate::print_options::{self, PrintOptions};
+use crate::print_options::PrintOptions;
+use clap::ArgEnum;
 use datafusion::arrow::array::{ArrayRef, StringArray};
 use datafusion::arrow::datatypes::{DataType, Field, Schema};
 use datafusion::arrow::record_batch::RecordBatch;
@@ -206,10 +207,14 @@ impl OutputFormat {
             Self::ChangeFormat(format) => {
                 if let Ok(format) = format.parse::<PrintFormat>() {
                     print_options.format = format;
-                    println!("Output format is {}.", print_options.format);
+                    println!("Output format is {:?}.", print_options.format);
                     Ok(())
                 } else {
-                    Err(DataFusionError::Execution(format!("{} is not a valid format type [possible values: csv, tsv, table, json, ndjson]", format)))
+                    Err(DataFusionError::Execution(format!(
+                        "{:?} is not a valid format type [possible values: {:?}]",
+                        format,
+                        PrintFormat::value_variants()
+                    )))
                 }
             }
         }
diff --git a/datafusion-cli/src/exec.rs b/datafusion-cli/src/exec.rs
index dad6d6e..17b329b 100644
--- a/datafusion-cli/src/exec.rs
+++ b/datafusion-cli/src/exec.rs
@@ -21,20 +21,14 @@ use crate::{
     command::{Command, OutputFormat},
     context::Context,
     helper::CliHelper,
-    print_format::{all_print_formats, PrintFormat},
     print_options::PrintOptions,
 };
-use datafusion::arrow::record_batch::RecordBatch;
-use datafusion::arrow::util::pretty;
-use datafusion::error::{DataFusionError, Result};
-use rustyline::config::Config;
+use datafusion::error::Result;
 use rustyline::error::ReadlineError;
 use rustyline::Editor;
 use std::fs::File;
 use std::io::prelude::*;
 use std::io::BufReader;
-use std::str::FromStr;
-use std::sync::Arc;
 use std::time::Instant;
 
 /// run and execute SQL statements and commands from a file, against a context with the given print options
@@ -109,7 +103,7 @@ pub async fn exec_from_repl(ctx: &mut Context, print_options: &mut PrintOptions)
                                     );
                                 }
                             } else {
-                                println!("Output format is {}.", print_options.format);
+                                println!("Output format is {:?}.", print_options.format);
                             }
                         }
                         _ => {
diff --git a/datafusion-cli/src/functions.rs b/datafusion-cli/src/functions.rs
index 2372e64..98b698a 100644
--- a/datafusion-cli/src/functions.rs
+++ b/datafusion-cli/src/functions.rs
@@ -20,7 +20,7 @@ use arrow::array::StringArray;
 use arrow::datatypes::{DataType, Field, Schema};
 use arrow::record_batch::RecordBatch;
 use arrow::util::pretty::pretty_format_batches;
-use datafusion::error::{DataFusionError, Result};
+use datafusion::error::Result;
 use std::fmt;
 use std::str::FromStr;
 use std::sync::Arc;
diff --git a/datafusion-cli/src/lib.rs b/datafusion-cli/src/lib.rs
index b2bcdd3..b75be33 100644
--- a/datafusion-cli/src/lib.rs
+++ b/datafusion-cli/src/lib.rs
@@ -16,7 +16,6 @@
 // under the License.
 
 #![doc = include_str!("../README.md")]
-#![allow(unused_imports)]
 pub const DATAFUSION_CLI_VERSION: &str = env!("CARGO_PKG_VERSION");
 
 pub mod command;
diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs
index 4cb9e9d..788bb27 100644
--- a/datafusion-cli/src/main.rs
+++ b/datafusion-cli/src/main.rs
@@ -15,14 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use clap::{crate_version, App, Arg};
+use clap::Parser;
 use datafusion::error::Result;
 use datafusion::execution::context::ExecutionConfig;
 use datafusion_cli::{
-    context::Context,
-    exec,
-    print_format::{all_print_formats, PrintFormat},
-    print_options::PrintOptions,
+    context::Context, exec, print_format::PrintFormat, print_options::PrintOptions,
     DATAFUSION_CLI_VERSION,
 };
 use std::env;
@@ -30,117 +27,84 @@ use std::fs::File;
 use std::io::BufReader;
 use std::path::Path;
 
+#[derive(Debug, Parser, PartialEq)]
+#[clap(author, version, about, long_about= None)]
+struct Args {
+    #[clap(
+        short = 'p',
+        long,
+        help = "Path to your data, default to current directory",
+        validator(is_valid_data_dir)
+    )]
+    data_path: Option<String>,
+
+    #[clap(
+        short = 'c',
+        long,
+        help = "The batch size of each query, or use DataFusion default",
+        validator(is_valid_batch_size)
+    )]
+    batch_size: Option<usize>,
+
+    #[clap(
+        short,
+        long,
+        multiple_values = true,
+        help = "Execute commands from file(s), then exit",
+        validator(is_valid_file)
+    )]
+    file: Vec<String>,
+
+    #[clap(long, arg_enum, default_value_t = PrintFormat::Table)]
+    format: PrintFormat,
+
+    #[clap(long, help = "Ballista scheduler host")]
+    host: Option<String>,
+
+    #[clap(long, help = "Ballista scheduler port")]
+    port: Option<u16>,
+
+    #[clap(
+        short,
+        long,
+        help = "Reduce printing other than the results and work quietly"
+    )]
+    quiet: bool,
+}
+
 #[tokio::main]
 pub async fn main() -> Result<()> {
-    let matches = App::new("DataFusion")
-        .version(crate_version!())
-        .about(
-            "DataFusion is an in-memory query engine that uses Apache Arrow \
-             as the memory model. It supports executing SQL queries against CSV and \
-             Parquet files as well as querying directly against in-memory data.",
-        )
-        .arg(
-            Arg::new("data-path")
-                .help("Path to your data, default to current directory")
-                .short('p')
-                .long("data-path")
-                .validator(is_valid_data_dir)
-                .takes_value(true),
-        )
-        .arg(
-            Arg::new("batch-size")
-                .help("The batch size of each query, or use DataFusion default")
-                .short('c')
-                .long("batch-size")
-                .validator(is_valid_batch_size)
-                .takes_value(true),
-        )
-        .arg(
-            Arg::new("file")
-                .help("Execute commands from file(s), then exit")
-                .short('f')
-                .long("file")
-                .multiple_occurrences(true)
-                .validator(is_valid_file)
-                .takes_value(true),
-        )
-        .arg(
-            Arg::new("format")
-                .help("Output format")
-                .long("format")
-                .default_value("table")
-                .possible_values(
-                    &all_print_formats()
-                        .iter()
-                        .map(|format| format.to_string())
-                        .collect::<Vec<_>>()
-                        .iter()
-                        .map(|i| i.as_str())
-                        .collect::<Vec<_>>(),
-                )
-                .takes_value(true),
-        )
-        .arg(
-            Arg::new("host")
-                .help("Ballista scheduler host")
-                .long("host")
-                .takes_value(true),
-        )
-        .arg(
-            Arg::new("port")
-                .help("Ballista scheduler port")
-                .long("port")
-                .takes_value(true),
-        )
-        .arg(
-            Arg::new("quiet")
-                .help("Reduce printing other than the results and work quietly")
-                .short('q')
-                .long("quiet")
-                .takes_value(false),
-        )
-        .get_matches();
-
-    let quiet = matches.is_present("quiet");
-
-    if !quiet {
-        println!("DataFusion CLI v{}\n", DATAFUSION_CLI_VERSION);
-    }
+    let args = Args::parse();
 
-    let host = matches.value_of("host");
-    let port = matches
-        .value_of("port")
-        .and_then(|port| port.parse::<u16>().ok());
+    if !args.quiet {
+        println!("DataFusion CLI v{}", DATAFUSION_CLI_VERSION);
+    }
 
-    if let Some(path) = matches.value_of("data-path") {
+    if let Some(ref path) = args.data_path {
         let p = Path::new(path);
         env::set_current_dir(&p).unwrap();
     };
 
     let mut execution_config = ExecutionConfig::new().with_information_schema(true);
 
-    if let Some(batch_size) = matches
-        .value_of("batch-size")
-        .and_then(|size| size.parse::<usize>().ok())
-    {
+    if let Some(batch_size) = args.batch_size {
         execution_config = execution_config.with_batch_size(batch_size);
     };
 
-    let mut ctx: Context = match (host, port) {
-        (Some(h), Some(p)) => Context::new_remote(h, p)?,
+    let mut ctx: Context = match (args.host, args.port) {
+        (Some(ref h), Some(p)) => Context::new_remote(h, p)?,
         _ => Context::new_local(&execution_config),
     };
 
-    let format = matches
-        .value_of("format")
-        .expect("No format is specified")
-        .parse::<PrintFormat>()
-        .expect("Invalid format");
-
-    let mut print_options = PrintOptions { format, quiet };
+    let mut print_options = PrintOptions {
+        format: args.format,
+        quiet: args.quiet,
+    };
 
-    if let Some(file_paths) = matches.values_of("file") {
-        let files = file_paths
+    let files = args.file;
+    if !files.is_empty() {
+        let files = files
+            .into_iter()
             .map(|file_path| File::open(file_path).unwrap())
             .collect::<Vec<_>>();
         for file in files {
diff --git a/datafusion-cli/src/print_format.rs b/datafusion-cli/src/print_format.rs
index 0320166..05a1ef7 100644
--- a/datafusion-cli/src/print_format.rs
+++ b/datafusion-cli/src/print_format.rs
@@ -21,11 +21,10 @@ use arrow::json::{ArrayWriter, LineDelimitedWriter};
 use datafusion::arrow::record_batch::RecordBatch;
 use datafusion::arrow::util::pretty;
 use datafusion::error::{DataFusionError, Result};
-use std::fmt;
 use std::str::FromStr;
 
 /// Allow records to be printed in different formats
-#[derive(Debug, PartialEq, Eq, Clone)]
+#[derive(Debug, PartialEq, Eq, clap::ArgEnum, Clone)]
 pub enum PrintFormat {
     Csv,
     Tsv,
@@ -34,40 +33,11 @@ pub enum PrintFormat {
     NdJson,
 }
 
-/// returns all print formats
-pub fn all_print_formats() -> Vec<PrintFormat> {
-    vec![
-        PrintFormat::Csv,
-        PrintFormat::Tsv,
-        PrintFormat::Table,
-        PrintFormat::Json,
-        PrintFormat::NdJson,
-    ]
-}
-
 impl FromStr for PrintFormat {
-    type Err = ();
-    fn from_str(s: &str) -> std::result::Result<Self, ()> {
-        match s.to_lowercase().as_str() {
-            "csv" => Ok(Self::Csv),
-            "tsv" => Ok(Self::Tsv),
-            "table" => Ok(Self::Table),
-            "json" => Ok(Self::Json),
-            "ndjson" => Ok(Self::NdJson),
-            _ => Err(()),
-        }
-    }
-}
+    type Err = String;
 
-impl fmt::Display for PrintFormat {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match *self {
-            Self::Csv => write!(f, "csv"),
-            Self::Tsv => write!(f, "tsv"),
-            Self::Table => write!(f, "table"),
-            Self::Json => write!(f, "json"),
-            Self::NdJson => write!(f, "ndjson"),
-        }
+    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
+        clap::ArgEnum::from_str(s, true)
     }
 }
 
@@ -124,38 +94,6 @@ mod tests {
     use std::sync::Arc;
 
     #[test]
-    fn test_from_str() {
-        let format = "csv".parse::<PrintFormat>().unwrap();
-        assert_eq!(PrintFormat::Csv, format);
-
-        let format = "tsv".parse::<PrintFormat>().unwrap();
-        assert_eq!(PrintFormat::Tsv, format);
-
-        let format = "json".parse::<PrintFormat>().unwrap();
-        assert_eq!(PrintFormat::Json, format);
-
-        let format = "ndjson".parse::<PrintFormat>().unwrap();
-        assert_eq!(PrintFormat::NdJson, format);
-
-        let format = "table".parse::<PrintFormat>().unwrap();
-        assert_eq!(PrintFormat::Table, format);
-    }
-
-    #[test]
-    fn test_to_str() {
-        assert_eq!("csv", PrintFormat::Csv.to_string());
-        assert_eq!("table", PrintFormat::Table.to_string());
-        assert_eq!("tsv", PrintFormat::Tsv.to_string());
-        assert_eq!("json", PrintFormat::Json.to_string());
-        assert_eq!("ndjson", PrintFormat::NdJson.to_string());
-    }
-
-    #[test]
-    fn test_from_str_failure() {
-        assert!("pretty".parse::<PrintFormat>().is_err());
-    }
-
-    #[test]
     fn test_print_batches_with_sep() {
         let batches = vec![];
         assert_eq!("", print_batches_with_sep(&batches, b',').unwrap());