You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/11/29 16:44:28 UTC
[arrow-datafusion] branch master updated: Adding more dataframe example to read csv files (#4360)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new fa4bea871 Adding more dataframe example to read csv files (#4360)
fa4bea871 is described below
commit fa4bea871086db70a8d19820a2f266de826836e1
Author: Data Psycho <mr...@gmail.com>
AuthorDate: Tue Nov 29 17:44:22 2022 +0100
Adding more dataframe example to read csv files (#4360)
* Adding more dataframe example to read csv files
* Update typo in the example
* Formatted changes with rustfmt
* Update datafusion-examples/examples/dataframe.rs
Co-authored-by: Martin Grigorov <ma...@users.noreply.github.com>
* Update datafusion-examples/examples/dataframe.rs
Co-authored-by: Martin Grigorov <ma...@users.noreply.github.com>
* Completed the incomplete instruction, formatted the content as suggested
* Update datafusion-examples/examples/dataframe.rs
Co-authored-by: Andrew Lamb <an...@nerdnetworks.org>
* Fix the type in default import for Csv Reader Option schema
* csv file creation into a separate function and then call it into another function
* Resolving clippy error for unnecessary let statement
* Resolving clippy unused variable error
Co-authored-by: ALAMSHC <AL...@c02dq0dsml7h.eu.novartis.net>
Co-authored-by: Martin Grigorov <ma...@users.noreply.github.com>
Co-authored-by: Andrew Lamb <an...@nerdnetworks.org>
---
datafusion-examples/examples/dataframe.rs | 57 +++++++++++++++++++++++++++++++
1 file changed, 57 insertions(+)
diff --git a/datafusion-examples/examples/dataframe.rs b/datafusion-examples/examples/dataframe.rs
index 5cdec9b88..a212387e2 100644
--- a/datafusion-examples/examples/dataframe.rs
+++ b/datafusion-examples/examples/dataframe.rs
@@ -15,8 +15,11 @@
// specific language governing permissions and limitations
// under the License.
+use datafusion::arrow::datatypes::{DataType, Field, Schema};
use datafusion::error::Result;
use datafusion::prelude::*;
+use std::fs;
+use std::sync::Arc;
/// This example demonstrates executing a simple query against an Arrow data source (Parquet) and
/// fetching results, using the DataFrame trait
@@ -39,5 +42,59 @@ async fn main() -> Result<()> {
// print the results
df.show().await?;
+ // Reading CSV file with inferred schema example
+ let csv_df = example_read_csv_file_with_inferred_schema().await;
+ csv_df.show().await?;
+
+ // Reading CSV file with defined schema
+ let csv_df = example_read_csv_file_with_schema().await;
+ csv_df.show().await?;
+
Ok(())
}
+
+// Function to create an test CSV file
+fn create_csv_file(path: String) {
+ // Create the data to put into the csv file with headers
+ let content = r#"id,time,vote,unixtime,rating
+a1,"10 6, 2013",3,1381017600,5.0
+a2,"08 9, 2013",2,1376006400,4.5"#;
+ // write the data
+ fs::write(path, content).expect("Problem with writing file!");
+}
+
+// Example to read data from a csv file with inferred schema
+async fn example_read_csv_file_with_inferred_schema() -> Arc<DataFrame> {
+ let path = "example.csv";
+ // Create a csv file using the predefined function
+ create_csv_file(path.to_string());
+ // Create a session context
+ let ctx = SessionContext::new();
+ // Register a lazy DataFrame using the context
+ ctx.read_csv(path, CsvReadOptions::default()).await.unwrap()
+}
+
+// Example to read csv file with a defined schema for the csv file
+async fn example_read_csv_file_with_schema() -> Arc<DataFrame> {
+ let path = "example.csv";
+ // Create a csv file using the predefined function
+ create_csv_file(path.to_string());
+ // Create a session context
+ let ctx = SessionContext::new();
+ // Define the schema
+ let schema = Schema::new(vec![
+ Field::new("id", DataType::Utf8, false),
+ Field::new("time", DataType::Utf8, false),
+ Field::new("vote", DataType::Int32, true),
+ Field::new("unixtime", DataType::Int64, false),
+ Field::new("rating", DataType::Float32, true),
+ ]);
+ // Create a csv option provider with the desired schema
+ let csv_read_option = CsvReadOptions {
+ // Update the option provider with the defined schema
+ schema: Some(&schema),
+ ..Default::default()
+ };
+ // Register a lazy DataFrame by using the context and option provider
+ ctx.read_csv(path, csv_read_option).await.unwrap()
+}