You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/11/29 16:44:28 UTC

[arrow-datafusion] branch master updated: Adding more dataframe example to read csv files (#4360)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new fa4bea871 Adding more dataframe example to read csv files (#4360)
fa4bea871 is described below

commit fa4bea871086db70a8d19820a2f266de826836e1
Author: Data Psycho <mr...@gmail.com>
AuthorDate: Tue Nov 29 17:44:22 2022 +0100

    Adding more dataframe example to read csv files (#4360)
    
    * Adding more dataframe example to read csv files
    
    * Update typo in the example
    
    * Formatted changes with rustfmt
    
    * Update datafusion-examples/examples/dataframe.rs
    
    Co-authored-by: Martin Grigorov <ma...@users.noreply.github.com>
    
    * Update datafusion-examples/examples/dataframe.rs
    
    Co-authored-by: Martin Grigorov <ma...@users.noreply.github.com>
    
    * Completed the incomplete instruction, formatted the content as suggested
    
    * Update datafusion-examples/examples/dataframe.rs
    
    Co-authored-by: Andrew Lamb <an...@nerdnetworks.org>
    
    * Fix the type in default import for Csv Reader Option schema
    
    * csv file creation into a separate function and then call it into another function
    
    * Resolving clippy error for unnecessary let statement
    
    * Resolving clippy unused variable error
    
    Co-authored-by: ALAMSHC <AL...@c02dq0dsml7h.eu.novartis.net>
    Co-authored-by: Martin Grigorov <ma...@users.noreply.github.com>
    Co-authored-by: Andrew Lamb <an...@nerdnetworks.org>
---
 datafusion-examples/examples/dataframe.rs | 57 +++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)

diff --git a/datafusion-examples/examples/dataframe.rs b/datafusion-examples/examples/dataframe.rs
index 5cdec9b88..a212387e2 100644
--- a/datafusion-examples/examples/dataframe.rs
+++ b/datafusion-examples/examples/dataframe.rs
@@ -15,8 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use datafusion::arrow::datatypes::{DataType, Field, Schema};
 use datafusion::error::Result;
 use datafusion::prelude::*;
+use std::fs;
+use std::sync::Arc;
 
 /// This example demonstrates executing a simple query against an Arrow data source (Parquet) and
 /// fetching results, using the DataFrame trait
@@ -39,5 +42,59 @@ async fn main() -> Result<()> {
     // print the results
     df.show().await?;
 
+    // Reading CSV file with inferred schema example
+    let csv_df = example_read_csv_file_with_inferred_schema().await;
+    csv_df.show().await?;
+
+    // Reading CSV file with defined schema
+    let csv_df = example_read_csv_file_with_schema().await;
+    csv_df.show().await?;
+
     Ok(())
 }
+
+// Function to create an test CSV file
+fn create_csv_file(path: String) {
+    // Create the data to put into the csv file with headers
+    let content = r#"id,time,vote,unixtime,rating
+a1,"10 6, 2013",3,1381017600,5.0
+a2,"08 9, 2013",2,1376006400,4.5"#;
+    // write the data
+    fs::write(path, content).expect("Problem with writing file!");
+}
+
+// Example to read data from a csv file with inferred schema
+async fn example_read_csv_file_with_inferred_schema() -> Arc<DataFrame> {
+    let path = "example.csv";
+    // Create a csv file using the predefined function
+    create_csv_file(path.to_string());
+    // Create a session context
+    let ctx = SessionContext::new();
+    // Register a lazy DataFrame using the context
+    ctx.read_csv(path, CsvReadOptions::default()).await.unwrap()
+}
+
+// Example to read csv file with a defined schema for the csv file
+async fn example_read_csv_file_with_schema() -> Arc<DataFrame> {
+    let path = "example.csv";
+    // Create a csv file using the predefined function
+    create_csv_file(path.to_string());
+    // Create a session context
+    let ctx = SessionContext::new();
+    // Define the schema
+    let schema = Schema::new(vec![
+        Field::new("id", DataType::Utf8, false),
+        Field::new("time", DataType::Utf8, false),
+        Field::new("vote", DataType::Int32, true),
+        Field::new("unixtime", DataType::Int64, false),
+        Field::new("rating", DataType::Float32, true),
+    ]);
+    // Create a csv option provider with the desired schema
+    let csv_read_option = CsvReadOptions {
+        // Update the option provider with the defined schema
+        schema: Some(&schema),
+        ..Default::default()
+    };
+    // Register a lazy DataFrame by using the context and option provider
+    ctx.read_csv(path, csv_read_option).await.unwrap()
+}