You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2021/05/21 18:30:17 UTC

[arrow-rs] branch master updated: feature gate csv functionality (#312)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new dde86b9  feature gate csv functionality (#312)
dde86b9 is described below

commit dde86b96206b94dde48f2fd0a39103120a18c4ff
Author: Ritchie Vink <ri...@gmail.com>
AuthorDate: Fri May 21 20:30:07 2021 +0200

    feature gate csv functionality (#312)
    
    * feature gate csv functionality
    
    * mock read_csv example
    
    * clippy
    
    * mock read_csv_infer_schema example
    
    * add tests of --no-default-features to CI
---
 .github/workflows/rust.yml              |  2 ++
 arrow/Cargo.toml                        |  5 +--
 arrow/benches/csv_writer.rs             | 64 +++++++++++++++++----------------
 arrow/examples/read_csv.rs              | 27 ++++++++------
 arrow/examples/read_csv_infer_schema.rs | 20 ++++++-----
 arrow/src/csv/reader.rs                 |  4 +--
 arrow/src/csv/writer.rs                 |  2 --
 arrow/src/error.rs                      |  2 +-
 arrow/src/lib.rs                        |  1 +
 arrow/src/util/string_writer.rs         |  3 ++
 10 files changed, 73 insertions(+), 57 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 53f9ac2..deb3d2f 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -112,6 +112,8 @@ jobs:
           cd arrow
           # re-run tests on arrow workspace with additional features
           cargo test --features=prettyprint
+          # run test on arrow with minimal set of features
+          cargo test --no-default-features
           cargo run --example builders
           cargo run --example dynamic_types
           cargo run --example read_csv
diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index 6d532ce..7781584 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -41,8 +41,8 @@ serde_derive = "1.0"
 serde_json = { version = "1.0", features = ["preserve_order"] }
 indexmap = "1.6"
 rand = "0.7"
-csv = "1.1"
 num = "0.4"
+csv_crate = { version = "1.1", optional = true, package="csv" }
 regex = "1.3"
 lazy_static = "1.4"
 packed_simd = { version = "0.3.4", optional = true, package = "packed_simd_2" }
@@ -54,8 +54,9 @@ lexical-core = "^0.7"
 multiversion = "0.6.1"
 
 [features]
-default = []
+default = ["csv"]
 avx512 = []
+csv = ["csv_crate"]
 simd = ["packed_simd"]
 prettyprint = ["prettytable-rs"]
 # this is only intended to be used in single-threaded programs: it verifies that
diff --git a/arrow/benches/csv_writer.rs b/arrow/benches/csv_writer.rs
index 9b01853..50b94d6 100644
--- a/arrow/benches/csv_writer.rs
+++ b/arrow/benches/csv_writer.rs
@@ -21,6 +21,7 @@ extern crate criterion;
 use criterion::*;
 
 use arrow::array::*;
+#[cfg(feature = "csv")]
 use arrow::csv;
 use arrow::datatypes::*;
 use arrow::record_batch::RecordBatch;
@@ -28,38 +29,41 @@ use std::fs::File;
 use std::sync::Arc;
 
 fn record_batches_to_csv() {
-    let schema = Schema::new(vec![
-        Field::new("c1", DataType::Utf8, false),
-        Field::new("c2", DataType::Float64, true),
-        Field::new("c3", DataType::UInt32, false),
-        Field::new("c3", DataType::Boolean, true),
-    ]);
+    #[cfg(feature = "csv")]
+    {
+        let schema = Schema::new(vec![
+            Field::new("c1", DataType::Utf8, false),
+            Field::new("c2", DataType::Float64, true),
+            Field::new("c3", DataType::UInt32, false),
+            Field::new("c3", DataType::Boolean, true),
+        ]);
 
-    let c1 = StringArray::from(vec![
-        "Lorem ipsum dolor sit amet",
-        "consectetur adipiscing elit",
-        "sed do eiusmod tempor",
-    ]);
-    let c2 = PrimitiveArray::<Float64Type>::from(vec![
-        Some(123.564532),
-        None,
-        Some(-556132.25),
-    ]);
-    let c3 = PrimitiveArray::<UInt32Type>::from(vec![3, 2, 1]);
-    let c4 = BooleanArray::from(vec![Some(true), Some(false), None]);
+        let c1 = StringArray::from(vec![
+            "Lorem ipsum dolor sit amet",
+            "consectetur adipiscing elit",
+            "sed do eiusmod tempor",
+        ]);
+        let c2 = PrimitiveArray::<Float64Type>::from(vec![
+            Some(123.564532),
+            None,
+            Some(-556132.25),
+        ]);
+        let c3 = PrimitiveArray::<UInt32Type>::from(vec![3, 2, 1]);
+        let c4 = BooleanArray::from(vec![Some(true), Some(false), None]);
 
-    let b = RecordBatch::try_new(
-        Arc::new(schema),
-        vec![Arc::new(c1), Arc::new(c2), Arc::new(c3), Arc::new(c4)],
-    )
-    .unwrap();
-    let file = File::create("target/bench_write_csv.csv").unwrap();
-    let mut writer = csv::Writer::new(file);
-    let batches = vec![&b, &b, &b, &b, &b, &b, &b, &b, &b, &b, &b];
-    #[allow(clippy::unit_arg)]
-    criterion::black_box(for batch in batches {
-        writer.write(batch).unwrap()
-    });
+        let b = RecordBatch::try_new(
+            Arc::new(schema),
+            vec![Arc::new(c1), Arc::new(c2), Arc::new(c3), Arc::new(c4)],
+        )
+        .unwrap();
+        let file = File::create("target/bench_write_csv.csv").unwrap();
+        let mut writer = csv::Writer::new(file);
+        let batches = vec![&b, &b, &b, &b, &b, &b, &b, &b, &b, &b, &b];
+        #[allow(clippy::unit_arg)]
+        criterion::black_box(for batch in batches {
+            writer.write(batch).unwrap()
+        });
+    }
 }
 
 fn criterion_benchmark(c: &mut Criterion) {
diff --git a/arrow/examples/read_csv.rs b/arrow/examples/read_csv.rs
index 9e2b9c3..506b898 100644
--- a/arrow/examples/read_csv.rs
+++ b/arrow/examples/read_csv.rs
@@ -20,24 +20,29 @@ extern crate arrow;
 use std::fs::File;
 use std::sync::Arc;
 
+#[cfg(feature = "csv")]
 use arrow::csv;
 use arrow::datatypes::{DataType, Field, Schema};
 #[cfg(feature = "prettyprint")]
 use arrow::util::pretty::print_batches;
 
 fn main() {
-    let schema = Schema::new(vec![
-        Field::new("city", DataType::Utf8, false),
-        Field::new("lat", DataType::Float64, false),
-        Field::new("lng", DataType::Float64, false),
-    ]);
+    #[cfg(feature = "csv")]
+    {
+        let schema = Schema::new(vec![
+            Field::new("city", DataType::Utf8, false),
+            Field::new("lat", DataType::Float64, false),
+            Field::new("lng", DataType::Float64, false),
+        ]);
 
-    let file = File::open("test/data/uk_cities.csv").unwrap();
+        let file = File::open("test/data/uk_cities.csv").unwrap();
 
-    let mut csv = csv::Reader::new(file, Arc::new(schema), false, None, 1024, None, None);
-    let _batch = csv.next().unwrap().unwrap();
-    #[cfg(feature = "prettyprint")]
-    {
-        print_batches(&[_batch]).unwrap();
+        let mut csv =
+            csv::Reader::new(file, Arc::new(schema), false, None, 1024, None, None);
+        let _batch = csv.next().unwrap().unwrap();
+        #[cfg(feature = "prettyprint")]
+        {
+            print_batches(&[_batch]).unwrap();
+        }
     }
 }
diff --git a/arrow/examples/read_csv_infer_schema.rs b/arrow/examples/read_csv_infer_schema.rs
index 93253e7..11f8cfb 100644
--- a/arrow/examples/read_csv_infer_schema.rs
+++ b/arrow/examples/read_csv_infer_schema.rs
@@ -17,20 +17,24 @@
 
 extern crate arrow;
 
+#[cfg(feature = "csv")]
 use arrow::csv;
 #[cfg(feature = "prettyprint")]
 use arrow::util::pretty::print_batches;
 use std::fs::File;
 
 fn main() {
-    let file = File::open("test/data/uk_cities_with_headers.csv").unwrap();
-    let builder = csv::ReaderBuilder::new()
-        .has_header(true)
-        .infer_schema(Some(100));
-    let mut csv = builder.build(file).unwrap();
-    let _batch = csv.next().unwrap().unwrap();
-    #[cfg(feature = "prettyprint")]
+    #[cfg(feature = "csv")]
     {
-        print_batches(&[_batch]).unwrap();
+        let file = File::open("test/data/uk_cities_with_headers.csv").unwrap();
+        let builder = csv::ReaderBuilder::new()
+            .has_header(true)
+            .infer_schema(Some(100));
+        let mut csv = builder.build(file).unwrap();
+        let _batch = csv.next().unwrap().unwrap();
+        #[cfg(feature = "prettyprint")]
+        {
+            print_batches(&[_batch]).unwrap();
+        }
     }
 }
diff --git a/arrow/src/csv/reader.rs b/arrow/src/csv/reader.rs
index 00f1d7f..5b9fb5d 100644
--- a/arrow/src/csv/reader.rs
+++ b/arrow/src/csv/reader.rs
@@ -49,8 +49,6 @@ use std::fs::File;
 use std::io::{Read, Seek, SeekFrom};
 use std::sync::Arc;
 
-use csv as csv_crate;
-
 use crate::array::{
     ArrayRef, BooleanArray, DictionaryArray, PrimitiveArray, StringArray,
 };
@@ -58,7 +56,7 @@ use crate::datatypes::*;
 use crate::error::{ArrowError, Result};
 use crate::record_batch::RecordBatch;
 
-use self::csv_crate::{ByteRecord, StringRecord};
+use csv_crate::{ByteRecord, StringRecord};
 
 lazy_static! {
     static ref DECIMAL_RE: Regex = Regex::new(r"^-?(\d+\.\d+)$").unwrap();
diff --git a/arrow/src/csv/writer.rs b/arrow/src/csv/writer.rs
index f2f4ce8..aa0ed67 100644
--- a/arrow/src/csv/writer.rs
+++ b/arrow/src/csv/writer.rs
@@ -65,8 +65,6 @@
 //! }
 //! ```
 
-use csv as csv_crate;
-
 use std::io::Write;
 
 use crate::datatypes::*;
diff --git a/arrow/src/error.rs b/arrow/src/error.rs
index 6bfa077..86896c0 100644
--- a/arrow/src/error.rs
+++ b/arrow/src/error.rs
@@ -19,7 +19,6 @@
 use std::fmt::{Debug, Display, Formatter};
 use std::io::Write;
 
-use csv as csv_crate;
 use std::error::Error;
 
 /// Many different operations in the `arrow` crate return this error type.
@@ -59,6 +58,7 @@ impl From<::std::io::Error> for ArrowError {
     }
 }
 
+#[cfg(feature = "csv")]
 impl From<csv_crate::Error> for ArrowError {
     fn from(error: csv_crate::Error) -> Self {
         match error.kind() {
diff --git a/arrow/src/lib.rs b/arrow/src/lib.rs
index 30f968c9..687ce1e 100644
--- a/arrow/src/lib.rs
+++ b/arrow/src/lib.rs
@@ -149,6 +149,7 @@ pub mod bitmap;
 pub mod buffer;
 mod bytes;
 pub mod compute;
+#[cfg(feature = "csv")]
 pub mod csv;
 pub mod datatypes;
 pub mod error;
diff --git a/arrow/src/util/string_writer.rs b/arrow/src/util/string_writer.rs
index 2a8175d..4c61f18 100644
--- a/arrow/src/util/string_writer.rs
+++ b/arrow/src/util/string_writer.rs
@@ -23,6 +23,8 @@
 //! Example:
 //!
 //! ```
+//! #[cfg(feature = "csv")]
+//! {
 //! use arrow::array::*;
 //! use arrow::csv;
 //! use arrow::datatypes::*;
@@ -58,6 +60,7 @@
 //! let sw = StringWriter::new();
 //! let mut writer = csv::Writer::new(sw);
 //! writer.write(&batch).unwrap();
+//! }
 //! ```
 
 use std::io::{Error, ErrorKind, Result, Write};