You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ja...@apache.org on 2023/01/09 04:10:57 UTC
[arrow-datafusion] branch master updated: Minor: Move test data into `datafusion/core/tests/data` (#4855)
This is an automated email from the ASF dual-hosted git repository.
jakevin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 664edea4e Minor: Move test data into `datafusion/core/tests/data` (#4855)
664edea4e is described below
commit 664edea4ec78114e8335a05a0e0dfa06a0d223b9
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Sun Jan 8 23:10:52 2023 -0500
Minor: Move test data into `datafusion/core/tests/data` (#4855)
* Minor: Move test data into tests/data
---
datafusion/core/src/dataframe.rs | 54 +++++++++++-----------
datafusion/core/src/execution/context.rs | 4 +-
datafusion/core/src/lib.rs | 4 +-
datafusion/core/src/physical_plan/mod.rs | 4 +-
datafusion/core/src/scheduler/mod.rs | 4 +-
.../core/tests/{ => data}/aggregate_simple.csv | 0
.../tests/{ => data}/aggregate_simple_pipe.csv | 0
.../core/tests/{ => data}/capitalized_example.csv | 0
datafusion/core/tests/{ => data}/customer.csv | 0
datafusion/core/tests/{ => data}/decimal_data.csv | 0
datafusion/core/tests/{ => data}/empty.csv | 0
datafusion/core/tests/{ => data}/example.csv | 0
datafusion/core/tests/{ => data}/null_cases.csv | 0
datafusion/core/tests/sql/create_drop.rs | 6 +--
datafusion/core/tests/sql/idenfifers.rs | 2 +-
datafusion/core/tests/sql/mod.rs | 6 +--
.../core/tests/sqllogictests/test_files/ddl.slt | 4 +-
datafusion/core/tests/user_defined_plan.rs | 7 +--
docs/source/user-guide/dataframe.md | 2 +-
docs/source/user-guide/example-usage.md | 12 ++---
20 files changed, 55 insertions(+), 54 deletions(-)
diff --git a/datafusion/core/src/dataframe.rs b/datafusion/core/src/dataframe.rs
index f0542e149..fe417593a 100644
--- a/datafusion/core/src/dataframe.rs
+++ b/datafusion/core/src/dataframe.rs
@@ -63,7 +63,7 @@ use crate::prelude::SessionContext;
/// # #[tokio::main]
/// # async fn main() -> Result<()> {
/// let ctx = SessionContext::new();
-/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+/// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
/// let df = df.filter(col("a").lt_eq(col("b")))?
/// .aggregate(vec![col("a")], vec![min(col("b"))])?
/// .limit(0, Some(100))?;
@@ -100,7 +100,7 @@ impl DataFrame {
/// # #[tokio::main]
/// # async fn main() -> Result<()> {
/// let ctx = SessionContext::new();
- /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+ /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
/// let df = df.select_columns(&["a", "b"])?;
/// # Ok(())
/// # }
@@ -125,7 +125,7 @@ impl DataFrame {
/// # #[tokio::main]
/// # async fn main() -> Result<()> {
/// let ctx = SessionContext::new();
- /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+ /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
/// let df = df.select(vec![col("a") * col("b"), col("c")])?;
/// # Ok(())
/// # }
@@ -150,7 +150,7 @@ impl DataFrame {
/// # #[tokio::main]
/// # async fn main() -> Result<()> {
/// let ctx = SessionContext::new();
- /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+ /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
/// let df = df.filter(col("a").lt_eq(col("b")))?;
/// # Ok(())
/// # }
@@ -170,7 +170,7 @@ impl DataFrame {
/// # #[tokio::main]
/// # async fn main() -> Result<()> {
/// let ctx = SessionContext::new();
- /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+ /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
///
/// // The following use is the equivalent of "SELECT MIN(b) GROUP BY a"
/// let _ = df.clone().aggregate(vec![col("a")], vec![min(col("b"))])?;
@@ -202,7 +202,7 @@ impl DataFrame {
/// # #[tokio::main]
/// # async fn main() -> Result<()> {
/// let ctx = SessionContext::new();
- /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+ /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
/// let df = df.limit(0, Some(100))?;
/// # Ok(())
/// # }
@@ -223,7 +223,7 @@ impl DataFrame {
/// # #[tokio::main]
/// # async fn main() -> Result<()> {
/// let ctx = SessionContext::new();
- /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+ /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
/// let d2 = df.clone();
/// let df = df.union(d2)?;
/// # Ok(())
@@ -245,7 +245,7 @@ impl DataFrame {
/// # #[tokio::main]
/// # async fn main() -> Result<()> {
/// let ctx = SessionContext::new();
- /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+ /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
/// let d2 = df.clone();
/// let df = df.union_distinct(d2)?;
/// # Ok(())
@@ -268,7 +268,7 @@ impl DataFrame {
/// # #[tokio::main]
/// # async fn main() -> Result<()> {
/// let ctx = SessionContext::new();
- /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+ /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
/// let df = df.distinct()?;
/// # Ok(())
/// # }
@@ -289,7 +289,7 @@ impl DataFrame {
/// # #[tokio::main]
/// # async fn main() -> Result<()> {
/// let ctx = SessionContext::new();
- /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+ /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
/// let df = df.sort(vec![col("a").sort(true, true), col("b").sort(false, false)])?;
/// # Ok(())
/// # }
@@ -311,8 +311,8 @@ impl DataFrame {
/// # #[tokio::main]
/// # async fn main() -> Result<()> {
/// let ctx = SessionContext::new();
- /// let left = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
- /// let right = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?
+ /// let left = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+ /// let right = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?
/// .select(vec![
/// col("a").alias("a2"),
/// col("b").alias("b2"),
@@ -349,7 +349,7 @@ impl DataFrame {
/// # #[tokio::main]
/// # async fn main() -> Result<()> {
/// let ctx = SessionContext::new();
- /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+ /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
/// let df1 = df.repartition(Partitioning::RoundRobinBatch(4))?;
/// # Ok(())
/// # }
@@ -370,7 +370,7 @@ impl DataFrame {
/// # #[tokio::main]
/// # async fn main() -> Result<()> {
/// let ctx = SessionContext::new();
- /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+ /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
/// let batches = df.collect().await?;
/// # Ok(())
/// # }
@@ -389,7 +389,7 @@ impl DataFrame {
/// # #[tokio::main]
/// # async fn main() -> Result<()> {
/// let ctx = SessionContext::new();
- /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+ /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
/// df.show().await?;
/// # Ok(())
/// # }
@@ -407,7 +407,7 @@ impl DataFrame {
/// # #[tokio::main]
/// # async fn main() -> Result<()> {
/// let ctx = SessionContext::new();
- /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+ /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
/// df.show_limit(10).await?;
/// # Ok(())
/// # }
@@ -429,7 +429,7 @@ impl DataFrame {
/// # #[tokio::main]
/// # async fn main() -> Result<()> {
/// let ctx = SessionContext::new();
- /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+ /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
/// let stream = df.execute_stream().await?;
/// # Ok(())
/// # }
@@ -449,7 +449,7 @@ impl DataFrame {
/// # #[tokio::main]
/// # async fn main() -> Result<()> {
/// let ctx = SessionContext::new();
- /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+ /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
/// let batches = df.collect_partitioned().await?;
/// # Ok(())
/// # }
@@ -468,7 +468,7 @@ impl DataFrame {
/// # #[tokio::main]
/// # async fn main() -> Result<()> {
/// let ctx = SessionContext::new();
- /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+ /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
/// let batches = df.execute_stream_partitioned().await?;
/// # Ok(())
/// # }
@@ -490,7 +490,7 @@ impl DataFrame {
/// # #[tokio::main]
/// # async fn main() -> Result<()> {
/// let ctx = SessionContext::new();
- /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+ /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
/// let schema = df.schema();
/// # Ok(())
/// # }
@@ -548,7 +548,7 @@ impl DataFrame {
/// # #[tokio::main]
/// # async fn main() -> Result<()> {
/// let ctx = SessionContext::new();
- /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+ /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
/// let batches = df.limit(0, Some(100))?.explain(false, false)?.collect().await?;
/// # Ok(())
/// # }
@@ -568,7 +568,7 @@ impl DataFrame {
/// # #[tokio::main]
/// # async fn main() -> Result<()> {
/// let ctx = SessionContext::new();
- /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+ /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
/// let f = df.registry();
/// // use f.udf("name", vec![...]) to use the udf
/// # Ok(())
@@ -586,7 +586,7 @@ impl DataFrame {
/// # #[tokio::main]
/// # async fn main() -> Result<()> {
/// let ctx = SessionContext::new();
- /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+ /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
/// let d2 = df.clone();
/// let df = df.intersect(d2)?;
/// # Ok(())
@@ -609,7 +609,7 @@ impl DataFrame {
/// # #[tokio::main]
/// # async fn main() -> Result<()> {
/// let ctx = SessionContext::new();
- /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+ /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
/// let d2 = df.clone();
/// let df = df.except(d2)?;
/// # Ok(())
@@ -655,7 +655,7 @@ impl DataFrame {
/// # #[tokio::main]
/// # async fn main() -> Result<()> {
/// let ctx = SessionContext::new();
- /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+ /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
/// let df = df.with_column("ab_sum", col("a") + col("b"))?;
/// # Ok(())
/// # }
@@ -705,7 +705,7 @@ impl DataFrame {
/// # #[tokio::main]
/// # async fn main() -> Result<()> {
/// let ctx = SessionContext::new();
- /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+ /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
/// let df = df.with_column_renamed("ab_sum", "total")?;
/// # Ok(())
/// # }
@@ -750,7 +750,7 @@ impl DataFrame {
/// # #[tokio::main]
/// # async fn main() -> Result<()> {
/// let ctx = SessionContext::new();
- /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+ /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
/// let df = df.cache().await?;
/// # Ok(())
/// # }
diff --git a/datafusion/core/src/execution/context.rs b/datafusion/core/src/execution/context.rs
index 9d652f811..2687902a3 100644
--- a/datafusion/core/src/execution/context.rs
+++ b/datafusion/core/src/execution/context.rs
@@ -120,7 +120,7 @@ use super::options::{
/// # #[tokio::main]
/// # async fn main() -> Result<()> {
/// let ctx = SessionContext::new();
-/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+/// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
/// let df = df.filter(col("a").lt_eq(col("b")))?
/// .aggregate(vec![col("a")], vec![min(col("b"))])?
/// .limit(0, Some(100))?;
@@ -138,7 +138,7 @@ use super::options::{
/// # #[tokio::main]
/// # async fn main() -> Result<()> {
/// let mut ctx = SessionContext::new();
-/// ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new()).await?;
+/// ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new()).await?;
/// let results = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100").await?;
/// # Ok(())
/// # }
diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index a1cbd653e..31b8c2862 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -37,7 +37,7 @@
//! let ctx = SessionContext::new();
//!
//! // create the dataframe
-//! let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+//! let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
//!
//! // create a plan
//! let df = df.filter(col("a").lt_eq(col("b")))?
@@ -75,7 +75,7 @@
//! # async fn main() -> Result<()> {
//! let ctx = SessionContext::new();
//!
-//! ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new()).await?;
+//! ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new()).await?;
//!
//! // create a plan
//! let df = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100").await?;
diff --git a/datafusion/core/src/physical_plan/mod.rs b/datafusion/core/src/physical_plan/mod.rs
index a30b650ec..d7383f231 100644
--- a/datafusion/core/src/physical_plan/mod.rs
+++ b/datafusion/core/src/physical_plan/mod.rs
@@ -321,7 +321,7 @@ pub fn with_new_children_if_necessary(
/// let mut ctx = SessionContext::with_config(config);
///
/// // register the a table
-/// ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new()).await.unwrap();
+/// ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new()).await.unwrap();
///
/// // create a plan to run a SQL query
/// let dataframe = ctx.sql("SELECT a FROM example WHERE a < 5").await.unwrap();
@@ -339,7 +339,7 @@ pub fn with_new_children_if_necessary(
/// \n CoalesceBatchesExec: target_batch_size=8192\
/// \n FilterExec: a@0 < 5\
/// \n RepartitionExec: partitioning=RoundRobinBatch(3)\
-/// \n CsvExec: files={1 group: [[WORKING_DIR/tests/example.csv]]}, has_header=true, limit=None, projection=[a]",
+/// \n CsvExec: files={1 group: [[WORKING_DIR/tests/data/example.csv]]}, has_header=true, limit=None, projection=[a]",
/// plan_string.trim());
///
/// let one_line = format!("{}", displayable_plan.one_line());
diff --git a/datafusion/core/src/scheduler/mod.rs b/datafusion/core/src/scheduler/mod.rs
index d552784fc..922fa1c3e 100644
--- a/datafusion/core/src/scheduler/mod.rs
+++ b/datafusion/core/src/scheduler/mod.rs
@@ -59,7 +59,7 @@
//! let config = SessionConfig::new().with_target_partitions(4);
//! let context = SessionContext::with_config(config);
//!
-//! context.register_csv("example", "../core/tests/example.csv", CsvReadOptions::new()).await.unwrap();
+//! context.register_csv("example", "../core/tests/data/example.csv", CsvReadOptions::new()).await.unwrap();
//! let plan = context.sql("SELECT MIN(b) FROM example")
//! .await
//! .unwrap()
@@ -345,7 +345,7 @@ mod tests {
"select id, b from (select id, b from table1 union all select id, b from table2 where a > 100 order by id) as t where b > 10 order by id, b",
"select id, MIN(b), MAX(b), AVG(b) from table1 group by id order by id",
"select count(*) from table1 where table1.a > 4",
- "WITH gp AS (SELECT id FROM table1 GROUP BY id)
+ "WITH gp AS (SELECT id FROM table1 GROUP BY id)
SELECT COUNT(CAST(CAST(gp.id || 'xx' AS TIMESTAMP) AS BIGINT)) FROM gp",
];
diff --git a/datafusion/core/tests/aggregate_simple.csv b/datafusion/core/tests/data/aggregate_simple.csv
similarity index 100%
rename from datafusion/core/tests/aggregate_simple.csv
rename to datafusion/core/tests/data/aggregate_simple.csv
diff --git a/datafusion/core/tests/aggregate_simple_pipe.csv b/datafusion/core/tests/data/aggregate_simple_pipe.csv
similarity index 100%
rename from datafusion/core/tests/aggregate_simple_pipe.csv
rename to datafusion/core/tests/data/aggregate_simple_pipe.csv
diff --git a/datafusion/core/tests/capitalized_example.csv b/datafusion/core/tests/data/capitalized_example.csv
similarity index 100%
rename from datafusion/core/tests/capitalized_example.csv
rename to datafusion/core/tests/data/capitalized_example.csv
diff --git a/datafusion/core/tests/customer.csv b/datafusion/core/tests/data/customer.csv
similarity index 100%
rename from datafusion/core/tests/customer.csv
rename to datafusion/core/tests/data/customer.csv
diff --git a/datafusion/core/tests/decimal_data.csv b/datafusion/core/tests/data/decimal_data.csv
similarity index 100%
rename from datafusion/core/tests/decimal_data.csv
rename to datafusion/core/tests/data/decimal_data.csv
diff --git a/datafusion/core/tests/empty.csv b/datafusion/core/tests/data/empty.csv
similarity index 100%
rename from datafusion/core/tests/empty.csv
rename to datafusion/core/tests/data/empty.csv
diff --git a/datafusion/core/tests/example.csv b/datafusion/core/tests/data/example.csv
similarity index 100%
rename from datafusion/core/tests/example.csv
rename to datafusion/core/tests/data/example.csv
diff --git a/datafusion/core/tests/null_cases.csv b/datafusion/core/tests/data/null_cases.csv
similarity index 100%
rename from datafusion/core/tests/null_cases.csv
rename to datafusion/core/tests/data/null_cases.csv
diff --git a/datafusion/core/tests/sql/create_drop.rs b/datafusion/core/tests/sql/create_drop.rs
index 1e3331db6..37e91bddc 100644
--- a/datafusion/core/tests/sql/create_drop.rs
+++ b/datafusion/core/tests/sql/create_drop.rs
@@ -42,13 +42,13 @@ async fn sql_create_table_if_not_exists() -> Result<()> {
assert_eq!(result, Vec::new());
// Create external table
- ctx.sql("CREATE EXTERNAL TABLE aggregate_simple STORED AS CSV WITH HEADER ROW LOCATION 'tests/aggregate_simple.csv'")
+ ctx.sql("CREATE EXTERNAL TABLE aggregate_simple STORED AS CSV WITH HEADER ROW LOCATION 'tests/data/aggregate_simple.csv'")
.await?
.collect()
.await?;
// Create external table
- let result = ctx.sql("CREATE EXTERNAL TABLE IF NOT EXISTS aggregate_simple STORED AS CSV WITH HEADER ROW LOCATION 'tests/aggregate_simple.csv'")
+ let result = ctx.sql("CREATE EXTERNAL TABLE IF NOT EXISTS aggregate_simple STORED AS CSV WITH HEADER ROW LOCATION 'tests/data/aggregate_simple.csv'")
.await?
.collect()
.await?;
@@ -129,7 +129,7 @@ async fn create_csv_table_empty_file() -> Result<()> {
let ctx =
SessionContext::with_config(SessionConfig::new().with_information_schema(true));
- let sql = "CREATE EXTERNAL TABLE empty STORED AS CSV WITH HEADER ROW LOCATION 'tests/empty.csv'";
+ let sql = "CREATE EXTERNAL TABLE empty STORED AS CSV WITH HEADER ROW LOCATION 'tests/data/empty.csv'";
ctx.sql(sql).await.unwrap();
let sql =
"select column_name, data_type, ordinal_position from information_schema.columns";
diff --git a/datafusion/core/tests/sql/idenfifers.rs b/datafusion/core/tests/sql/idenfifers.rs
index e2fde56e9..a305f23b4 100644
--- a/datafusion/core/tests/sql/idenfifers.rs
+++ b/datafusion/core/tests/sql/idenfifers.rs
@@ -31,7 +31,7 @@ async fn normalized_column_identifiers() {
// register csv file with the execution context
ctx.register_csv(
"case_insensitive_test",
- "tests/example.csv",
+ "tests/data/example.csv",
CsvReadOptions::new(),
)
.await
diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs
index 445950e1d..2575bfcdd 100644
--- a/datafusion/core/tests/sql/mod.rs
+++ b/datafusion/core/tests/sql/mod.rs
@@ -978,7 +978,7 @@ async fn register_aggregate_simple_csv(ctx: &SessionContext) -> Result<()> {
ctx.register_csv(
"aggregate_simple",
- "tests/aggregate_simple.csv",
+ "tests/data/aggregate_simple.csv",
CsvReadOptions::new().schema(&schema),
)
.await?;
@@ -995,7 +995,7 @@ async fn register_aggregate_null_cases_csv(ctx: &SessionContext) -> Result<()> {
ctx.register_csv(
"null_cases",
- "tests/null_cases.csv",
+ "tests/data/null_cases.csv",
CsvReadOptions::new().schema(&schema),
)
.await?;
@@ -1189,7 +1189,7 @@ async fn register_decimal_csv_table_by_sql(ctx: &SessionContext) {
)
STORED AS CSV
WITH HEADER ROW
- LOCATION 'tests/decimal_data.csv'",
+ LOCATION 'tests/data/decimal_data.csv'",
)
.await
.expect("Creating dataframe for CREATE EXTERNAL TABLE with decimal data type");
diff --git a/datafusion/core/tests/sqllogictests/test_files/ddl.slt b/datafusion/core/tests/sqllogictests/test_files/ddl.slt
index 5cb4bd180..bdce635a4 100644
--- a/datafusion/core/tests/sqllogictests/test_files/ddl.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/ddl.slt
@@ -203,7 +203,7 @@ DROP VIEW non_existent_view
##########
statement ok
-CREATE external table aggregate_simple(c1 real, c2 double, c3 boolean) STORED as CSV WITH HEADER ROW LOCATION 'tests/aggregate_simple.csv';
+CREATE external table aggregate_simple(c1 real, c2 double, c3 boolean) STORED as CSV WITH HEADER ROW LOCATION 'tests/data/aggregate_simple.csv';
# create_table_as
statement ok
@@ -403,7 +403,7 @@ DROP VIEW y;
# create_pipe_delimited_csv_table()
statement ok
-CREATE EXTERNAL TABLE aggregate_simple STORED AS CSV WITH HEADER ROW DELIMITER '|' LOCATION 'tests/aggregate_simple_pipe.csv';
+CREATE EXTERNAL TABLE aggregate_simple STORED AS CSV WITH HEADER ROW DELIMITER '|' LOCATION 'tests/data/aggregate_simple_pipe.csv';
query CCC
diff --git a/datafusion/core/tests/user_defined_plan.rs b/datafusion/core/tests/user_defined_plan.rs
index ff284eaaa..dcd182808 100644
--- a/datafusion/core/tests/user_defined_plan.rs
+++ b/datafusion/core/tests/user_defined_plan.rs
@@ -31,7 +31,7 @@
//!
//! ```sql
//! CREATE EXTERNAL TABLE sales(customer_id VARCHAR, revenue BIGINT)
-//! STORED AS CSV location 'tests/customer.csv';
+//! STORED AS CSV location 'tests/data/customer.csv';
//!
//! SELECT customer_id, revenue FROM sales ORDER BY revenue DESC limit 3;
//! ```
@@ -104,7 +104,7 @@ async fn exec_sql(ctx: &mut SessionContext, sql: &str) -> Result<String> {
/// Create a test table.
async fn setup_table(mut ctx: SessionContext) -> Result<SessionContext> {
- let sql = "CREATE EXTERNAL TABLE sales(customer_id VARCHAR, revenue BIGINT) STORED AS CSV location 'tests/customer.csv'";
+ let sql = "CREATE EXTERNAL TABLE sales(customer_id VARCHAR, revenue BIGINT) STORED AS CSV location 'tests/data/customer.csv'";
let expected = vec!["++", "++"];
@@ -116,7 +116,8 @@ async fn setup_table(mut ctx: SessionContext) -> Result<SessionContext> {
}
async fn setup_table_without_schemas(mut ctx: SessionContext) -> Result<SessionContext> {
- let sql = "CREATE EXTERNAL TABLE sales STORED AS CSV location 'tests/customer.csv'";
+ let sql =
+ "CREATE EXTERNAL TABLE sales STORED AS CSV location 'tests/data/customer.csv'";
let expected = vec!["++", "++"];
diff --git a/docs/source/user-guide/dataframe.md b/docs/source/user-guide/dataframe.md
index 77768dd8b..23766cd07 100644
--- a/docs/source/user-guide/dataframe.md
+++ b/docs/source/user-guide/dataframe.md
@@ -39,7 +39,7 @@ Here is a minimal example showing the execution of a query using the DataFrame A
```rust
let ctx = SessionContext::new();
-let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
let df = df.filter(col("a").lt_eq(col("b")))?
.aggregate(vec![col("a")], vec![min(col("b"))])?
.limit(0, Some(100))?;
diff --git a/docs/source/user-guide/example-usage.md b/docs/source/user-guide/example-usage.md
index 4606aa974..03283a408 100644
--- a/docs/source/user-guide/example-usage.md
+++ b/docs/source/user-guide/example-usage.md
@@ -19,7 +19,7 @@
# Example Usage
-In this example some simple processing is performed on the [`example.csv`](../../../datafusion/core/tests/example.csv) file.
+In this example some simple processing is performed on the [`example.csv`](../../../datafusion/core/tests/data/example.csv) file.
## Update `Cargo.toml`
@@ -39,7 +39,7 @@ use datafusion::prelude::*;
async fn main() -> datafusion::error::Result<()> {
// register the table
let ctx = SessionContext::new();
- ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new()).await?;
+ ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new()).await?;
// create a plan to run a SQL query
let df = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100").await?;
@@ -59,7 +59,7 @@ use datafusion::prelude::*;
async fn main() -> datafusion::error::Result<()> {
// create the dataframe
let ctx = SessionContext::new();
- let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+ let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
let df = df.filter(col("a").lt_eq(col("b")))?
.aggregate(vec![col("a")], vec![min(col("b"))])?
@@ -85,7 +85,7 @@ async fn main() -> datafusion::error::Result<()> {
Please be aware that all identifiers are effectively made lower-case in SQL, so if your csv file has capital letters (ex: `Name`) you must put your column name in double quotes or the examples won't work.
-To illustrate this behavior, consider the [`capitalized_example.csv`](../../../datafusion/core/tests/capitalized_example.csv) file:
+To illustrate this behavior, consider the [`capitalized_example.csv`](../../../datafusion/core/tests/data/capitalized_example.csv) file:
## Run a SQL query against data stored in a CSV:
@@ -96,7 +96,7 @@ use datafusion::prelude::*;
async fn main() -> datafusion::error::Result<()> {
// register the table
let ctx = SessionContext::new();
- ctx.register_csv("example", "tests/capitalized_example.csv", CsvReadOptions::new()).await?;
+ ctx.register_csv("example", "tests/data/capitalized_example.csv", CsvReadOptions::new()).await?;
// create a plan to run a SQL query
let df = ctx.sql("SELECT \"A\", MIN(b) FROM example GROUP BY \"A\" LIMIT 100").await?;
@@ -116,7 +116,7 @@ use datafusion::prelude::*;
async fn main() -> datafusion::error::Result<()> {
// create the dataframe
let ctx = SessionContext::new();
- let df = ctx.read_csv("tests/capitalized_example.csv", CsvReadOptions::new()).await?;
+ let df = ctx.read_csv("tests/data/capitalized_example.csv", CsvReadOptions::new()).await?;
let df = df.filter(col("A").lt_eq(col("c")))?
.aggregate(vec![col("A")], vec![min(col("b"))])?