You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ja...@apache.org on 2023/01/09 04:10:57 UTC

[arrow-datafusion] branch master updated: Minor: Move test data into `datafusion/core/tests/data` (#4855)

This is an automated email from the ASF dual-hosted git repository.

jakevin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 664edea4e Minor: Move test data into `datafusion/core/tests/data` (#4855)
664edea4e is described below

commit 664edea4ec78114e8335a05a0e0dfa06a0d223b9
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Sun Jan 8 23:10:52 2023 -0500

    Minor: Move test data into `datafusion/core/tests/data` (#4855)
    
    * Minor: Move test data into tests/data
---
 datafusion/core/src/dataframe.rs                   | 54 +++++++++++-----------
 datafusion/core/src/execution/context.rs           |  4 +-
 datafusion/core/src/lib.rs                         |  4 +-
 datafusion/core/src/physical_plan/mod.rs           |  4 +-
 datafusion/core/src/scheduler/mod.rs               |  4 +-
 .../core/tests/{ => data}/aggregate_simple.csv     |  0
 .../tests/{ => data}/aggregate_simple_pipe.csv     |  0
 .../core/tests/{ => data}/capitalized_example.csv  |  0
 datafusion/core/tests/{ => data}/customer.csv      |  0
 datafusion/core/tests/{ => data}/decimal_data.csv  |  0
 datafusion/core/tests/{ => data}/empty.csv         |  0
 datafusion/core/tests/{ => data}/example.csv       |  0
 datafusion/core/tests/{ => data}/null_cases.csv    |  0
 datafusion/core/tests/sql/create_drop.rs           |  6 +--
 datafusion/core/tests/sql/idenfifers.rs            |  2 +-
 datafusion/core/tests/sql/mod.rs                   |  6 +--
 .../core/tests/sqllogictests/test_files/ddl.slt    |  4 +-
 datafusion/core/tests/user_defined_plan.rs         |  7 +--
 docs/source/user-guide/dataframe.md                |  2 +-
 docs/source/user-guide/example-usage.md            | 12 ++---
 20 files changed, 55 insertions(+), 54 deletions(-)

diff --git a/datafusion/core/src/dataframe.rs b/datafusion/core/src/dataframe.rs
index f0542e149..fe417593a 100644
--- a/datafusion/core/src/dataframe.rs
+++ b/datafusion/core/src/dataframe.rs
@@ -63,7 +63,7 @@ use crate::prelude::SessionContext;
 /// # #[tokio::main]
 /// # async fn main() -> Result<()> {
 /// let ctx = SessionContext::new();
-/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+/// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
 /// let df = df.filter(col("a").lt_eq(col("b")))?
 ///            .aggregate(vec![col("a")], vec![min(col("b"))])?
 ///            .limit(0, Some(100))?;
@@ -100,7 +100,7 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
     /// let df = df.select_columns(&["a", "b"])?;
     /// # Ok(())
     /// # }
@@ -125,7 +125,7 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
     /// let df = df.select(vec![col("a") * col("b"), col("c")])?;
     /// # Ok(())
     /// # }
@@ -150,7 +150,7 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
     /// let df = df.filter(col("a").lt_eq(col("b")))?;
     /// # Ok(())
     /// # }
@@ -170,7 +170,7 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
     ///
     /// // The following use is the equivalent of "SELECT MIN(b) GROUP BY a"
     /// let _ = df.clone().aggregate(vec![col("a")], vec![min(col("b"))])?;
@@ -202,7 +202,7 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
     /// let df = df.limit(0, Some(100))?;
     /// # Ok(())
     /// # }
@@ -223,7 +223,7 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
     /// let d2 = df.clone();
     /// let df = df.union(d2)?;
     /// # Ok(())
@@ -245,7 +245,7 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
     /// let d2 = df.clone();
     /// let df = df.union_distinct(d2)?;
     /// # Ok(())
@@ -268,7 +268,7 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
     /// let df = df.distinct()?;
     /// # Ok(())
     /// # }
@@ -289,7 +289,7 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
     /// let df = df.sort(vec![col("a").sort(true, true), col("b").sort(false, false)])?;
     /// # Ok(())
     /// # }
@@ -311,8 +311,8 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let left = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
-    /// let right = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?
+    /// let left = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    /// let right = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?
     ///   .select(vec![
     ///     col("a").alias("a2"),
     ///     col("b").alias("b2"),
@@ -349,7 +349,7 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
     /// let df1 = df.repartition(Partitioning::RoundRobinBatch(4))?;
     /// # Ok(())
     /// # }
@@ -370,7 +370,7 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
     /// let batches = df.collect().await?;
     /// # Ok(())
     /// # }
@@ -389,7 +389,7 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
     /// df.show().await?;
     /// # Ok(())
     /// # }
@@ -407,7 +407,7 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
     /// df.show_limit(10).await?;
     /// # Ok(())
     /// # }
@@ -429,7 +429,7 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
     /// let stream = df.execute_stream().await?;
     /// # Ok(())
     /// # }
@@ -449,7 +449,7 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
     /// let batches = df.collect_partitioned().await?;
     /// # Ok(())
     /// # }
@@ -468,7 +468,7 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
     /// let batches = df.execute_stream_partitioned().await?;
     /// # Ok(())
     /// # }
@@ -490,7 +490,7 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
     /// let schema = df.schema();
     /// # Ok(())
     /// # }
@@ -548,7 +548,7 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
     /// let batches = df.limit(0, Some(100))?.explain(false, false)?.collect().await?;
     /// # Ok(())
     /// # }
@@ -568,7 +568,7 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
     /// let f = df.registry();
     /// // use f.udf("name", vec![...]) to use the udf
     /// # Ok(())
@@ -586,7 +586,7 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
     /// let d2 = df.clone();
     /// let df = df.intersect(d2)?;
     /// # Ok(())
@@ -609,7 +609,7 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
     /// let d2 = df.clone();
     /// let df = df.except(d2)?;
     /// # Ok(())
@@ -655,7 +655,7 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
     /// let df = df.with_column("ab_sum", col("a") + col("b"))?;
     /// # Ok(())
     /// # }
@@ -705,7 +705,7 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
     /// let df = df.with_column_renamed("ab_sum", "total")?;
     /// # Ok(())
     /// # }
@@ -750,7 +750,7 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
     /// let df = df.cache().await?;
     /// # Ok(())
     /// # }
diff --git a/datafusion/core/src/execution/context.rs b/datafusion/core/src/execution/context.rs
index 9d652f811..2687902a3 100644
--- a/datafusion/core/src/execution/context.rs
+++ b/datafusion/core/src/execution/context.rs
@@ -120,7 +120,7 @@ use super::options::{
 /// # #[tokio::main]
 /// # async fn main() -> Result<()> {
 /// let ctx = SessionContext::new();
-/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+/// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
 /// let df = df.filter(col("a").lt_eq(col("b")))?
 ///            .aggregate(vec![col("a")], vec![min(col("b"))])?
 ///            .limit(0, Some(100))?;
@@ -138,7 +138,7 @@ use super::options::{
 /// # #[tokio::main]
 /// # async fn main() -> Result<()> {
 /// let mut ctx = SessionContext::new();
-/// ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new()).await?;
+/// ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new()).await?;
 /// let results = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100").await?;
 /// # Ok(())
 /// # }
diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index a1cbd653e..31b8c2862 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -37,7 +37,7 @@
 //! let ctx = SessionContext::new();
 //!
 //! // create the dataframe
-//! let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+//! let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
 //!
 //! // create a plan
 //! let df = df.filter(col("a").lt_eq(col("b")))?
@@ -75,7 +75,7 @@
 //! # async fn main() -> Result<()> {
 //! let ctx = SessionContext::new();
 //!
-//! ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new()).await?;
+//! ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new()).await?;
 //!
 //! // create a plan
 //! let df = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100").await?;
diff --git a/datafusion/core/src/physical_plan/mod.rs b/datafusion/core/src/physical_plan/mod.rs
index a30b650ec..d7383f231 100644
--- a/datafusion/core/src/physical_plan/mod.rs
+++ b/datafusion/core/src/physical_plan/mod.rs
@@ -321,7 +321,7 @@ pub fn with_new_children_if_necessary(
 ///   let mut ctx = SessionContext::with_config(config);
 ///
 ///   // register the a table
-///   ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new()).await.unwrap();
+///   ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new()).await.unwrap();
 ///
 ///   // create a plan to run a SQL query
 ///   let dataframe = ctx.sql("SELECT a FROM example WHERE a < 5").await.unwrap();
@@ -339,7 +339,7 @@ pub fn with_new_children_if_necessary(
 ///              \n  CoalesceBatchesExec: target_batch_size=8192\
 ///              \n    FilterExec: a@0 < 5\
 ///              \n      RepartitionExec: partitioning=RoundRobinBatch(3)\
-///              \n        CsvExec: files={1 group: [[WORKING_DIR/tests/example.csv]]}, has_header=true, limit=None, projection=[a]",
+///              \n        CsvExec: files={1 group: [[WORKING_DIR/tests/data/example.csv]]}, has_header=true, limit=None, projection=[a]",
 ///               plan_string.trim());
 ///
 ///   let one_line = format!("{}", displayable_plan.one_line());
diff --git a/datafusion/core/src/scheduler/mod.rs b/datafusion/core/src/scheduler/mod.rs
index d552784fc..922fa1c3e 100644
--- a/datafusion/core/src/scheduler/mod.rs
+++ b/datafusion/core/src/scheduler/mod.rs
@@ -59,7 +59,7 @@
 //! let config = SessionConfig::new().with_target_partitions(4);
 //! let context = SessionContext::with_config(config);
 //!
-//! context.register_csv("example", "../core/tests/example.csv", CsvReadOptions::new()).await.unwrap();
+//! context.register_csv("example", "../core/tests/data/example.csv", CsvReadOptions::new()).await.unwrap();
 //! let plan = context.sql("SELECT MIN(b) FROM example")
 //!     .await
 //!    .unwrap()
@@ -345,7 +345,7 @@ mod tests {
             "select id, b from (select id, b from table1 union all select id, b from table2 where a > 100 order by id) as t where b > 10 order by id, b",
             "select id, MIN(b), MAX(b), AVG(b) from table1 group by id order by id",
             "select count(*) from table1 where table1.a > 4",
-            "WITH gp AS (SELECT id FROM table1 GROUP BY id) 
+            "WITH gp AS (SELECT id FROM table1 GROUP BY id)
             SELECT COUNT(CAST(CAST(gp.id || 'xx' AS TIMESTAMP) AS BIGINT)) FROM gp",
         ];
 
diff --git a/datafusion/core/tests/aggregate_simple.csv b/datafusion/core/tests/data/aggregate_simple.csv
similarity index 100%
rename from datafusion/core/tests/aggregate_simple.csv
rename to datafusion/core/tests/data/aggregate_simple.csv
diff --git a/datafusion/core/tests/aggregate_simple_pipe.csv b/datafusion/core/tests/data/aggregate_simple_pipe.csv
similarity index 100%
rename from datafusion/core/tests/aggregate_simple_pipe.csv
rename to datafusion/core/tests/data/aggregate_simple_pipe.csv
diff --git a/datafusion/core/tests/capitalized_example.csv b/datafusion/core/tests/data/capitalized_example.csv
similarity index 100%
rename from datafusion/core/tests/capitalized_example.csv
rename to datafusion/core/tests/data/capitalized_example.csv
diff --git a/datafusion/core/tests/customer.csv b/datafusion/core/tests/data/customer.csv
similarity index 100%
rename from datafusion/core/tests/customer.csv
rename to datafusion/core/tests/data/customer.csv
diff --git a/datafusion/core/tests/decimal_data.csv b/datafusion/core/tests/data/decimal_data.csv
similarity index 100%
rename from datafusion/core/tests/decimal_data.csv
rename to datafusion/core/tests/data/decimal_data.csv
diff --git a/datafusion/core/tests/empty.csv b/datafusion/core/tests/data/empty.csv
similarity index 100%
rename from datafusion/core/tests/empty.csv
rename to datafusion/core/tests/data/empty.csv
diff --git a/datafusion/core/tests/example.csv b/datafusion/core/tests/data/example.csv
similarity index 100%
rename from datafusion/core/tests/example.csv
rename to datafusion/core/tests/data/example.csv
diff --git a/datafusion/core/tests/null_cases.csv b/datafusion/core/tests/data/null_cases.csv
similarity index 100%
rename from datafusion/core/tests/null_cases.csv
rename to datafusion/core/tests/data/null_cases.csv
diff --git a/datafusion/core/tests/sql/create_drop.rs b/datafusion/core/tests/sql/create_drop.rs
index 1e3331db6..37e91bddc 100644
--- a/datafusion/core/tests/sql/create_drop.rs
+++ b/datafusion/core/tests/sql/create_drop.rs
@@ -42,13 +42,13 @@ async fn sql_create_table_if_not_exists() -> Result<()> {
     assert_eq!(result, Vec::new());
 
     // Create external table
-    ctx.sql("CREATE EXTERNAL TABLE aggregate_simple STORED AS CSV WITH HEADER ROW LOCATION 'tests/aggregate_simple.csv'")
+    ctx.sql("CREATE EXTERNAL TABLE aggregate_simple STORED AS CSV WITH HEADER ROW LOCATION 'tests/data/aggregate_simple.csv'")
         .await?
         .collect()
         .await?;
 
     // Create external table
-    let result = ctx.sql("CREATE EXTERNAL TABLE IF NOT EXISTS aggregate_simple STORED AS CSV WITH HEADER ROW LOCATION 'tests/aggregate_simple.csv'")
+    let result = ctx.sql("CREATE EXTERNAL TABLE IF NOT EXISTS aggregate_simple STORED AS CSV WITH HEADER ROW LOCATION 'tests/data/aggregate_simple.csv'")
         .await?
         .collect()
         .await?;
@@ -129,7 +129,7 @@ async fn create_csv_table_empty_file() -> Result<()> {
     let ctx =
         SessionContext::with_config(SessionConfig::new().with_information_schema(true));
 
-    let sql = "CREATE EXTERNAL TABLE empty STORED AS CSV WITH HEADER ROW LOCATION 'tests/empty.csv'";
+    let sql = "CREATE EXTERNAL TABLE empty STORED AS CSV WITH HEADER ROW LOCATION 'tests/data/empty.csv'";
     ctx.sql(sql).await.unwrap();
     let sql =
         "select column_name, data_type, ordinal_position from information_schema.columns";
diff --git a/datafusion/core/tests/sql/idenfifers.rs b/datafusion/core/tests/sql/idenfifers.rs
index e2fde56e9..a305f23b4 100644
--- a/datafusion/core/tests/sql/idenfifers.rs
+++ b/datafusion/core/tests/sql/idenfifers.rs
@@ -31,7 +31,7 @@ async fn normalized_column_identifiers() {
     // register csv file with the execution context
     ctx.register_csv(
         "case_insensitive_test",
-        "tests/example.csv",
+        "tests/data/example.csv",
         CsvReadOptions::new(),
     )
     .await
diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs
index 445950e1d..2575bfcdd 100644
--- a/datafusion/core/tests/sql/mod.rs
+++ b/datafusion/core/tests/sql/mod.rs
@@ -978,7 +978,7 @@ async fn register_aggregate_simple_csv(ctx: &SessionContext) -> Result<()> {
 
     ctx.register_csv(
         "aggregate_simple",
-        "tests/aggregate_simple.csv",
+        "tests/data/aggregate_simple.csv",
         CsvReadOptions::new().schema(&schema),
     )
     .await?;
@@ -995,7 +995,7 @@ async fn register_aggregate_null_cases_csv(ctx: &SessionContext) -> Result<()> {
 
     ctx.register_csv(
         "null_cases",
-        "tests/null_cases.csv",
+        "tests/data/null_cases.csv",
         CsvReadOptions::new().schema(&schema),
     )
     .await?;
@@ -1189,7 +1189,7 @@ async fn register_decimal_csv_table_by_sql(ctx: &SessionContext) {
             )
             STORED AS CSV
             WITH HEADER ROW
-            LOCATION 'tests/decimal_data.csv'",
+            LOCATION 'tests/data/decimal_data.csv'",
         )
         .await
         .expect("Creating dataframe for CREATE EXTERNAL TABLE with decimal data type");
diff --git a/datafusion/core/tests/sqllogictests/test_files/ddl.slt b/datafusion/core/tests/sqllogictests/test_files/ddl.slt
index 5cb4bd180..bdce635a4 100644
--- a/datafusion/core/tests/sqllogictests/test_files/ddl.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/ddl.slt
@@ -203,7 +203,7 @@ DROP VIEW non_existent_view
 ##########
 
 statement ok
-CREATE external table aggregate_simple(c1 real, c2 double, c3 boolean) STORED as CSV WITH HEADER ROW LOCATION 'tests/aggregate_simple.csv';
+CREATE external table aggregate_simple(c1 real, c2 double, c3 boolean) STORED as CSV WITH HEADER ROW LOCATION 'tests/data/aggregate_simple.csv';
 
 # create_table_as
 statement ok
@@ -403,7 +403,7 @@ DROP VIEW y;
 # create_pipe_delimited_csv_table()
 
 statement ok
-CREATE EXTERNAL TABLE aggregate_simple STORED AS CSV WITH HEADER ROW DELIMITER '|' LOCATION 'tests/aggregate_simple_pipe.csv';
+CREATE EXTERNAL TABLE aggregate_simple STORED AS CSV WITH HEADER ROW DELIMITER '|' LOCATION 'tests/data/aggregate_simple_pipe.csv';
 
 
 query CCC
diff --git a/datafusion/core/tests/user_defined_plan.rs b/datafusion/core/tests/user_defined_plan.rs
index ff284eaaa..dcd182808 100644
--- a/datafusion/core/tests/user_defined_plan.rs
+++ b/datafusion/core/tests/user_defined_plan.rs
@@ -31,7 +31,7 @@
 //!
 //! ```sql
 //! CREATE EXTERNAL TABLE sales(customer_id VARCHAR, revenue BIGINT)
-//!   STORED AS CSV location 'tests/customer.csv';
+//!   STORED AS CSV location 'tests/data/customer.csv';
 //!
 //! SELECT customer_id, revenue FROM sales ORDER BY revenue DESC limit 3;
 //! ```
@@ -104,7 +104,7 @@ async fn exec_sql(ctx: &mut SessionContext, sql: &str) -> Result<String> {
 
 /// Create a test table.
 async fn setup_table(mut ctx: SessionContext) -> Result<SessionContext> {
-    let sql = "CREATE EXTERNAL TABLE sales(customer_id VARCHAR, revenue BIGINT) STORED AS CSV location 'tests/customer.csv'";
+    let sql = "CREATE EXTERNAL TABLE sales(customer_id VARCHAR, revenue BIGINT) STORED AS CSV location 'tests/data/customer.csv'";
 
     let expected = vec!["++", "++"];
 
@@ -116,7 +116,8 @@ async fn setup_table(mut ctx: SessionContext) -> Result<SessionContext> {
 }
 
 async fn setup_table_without_schemas(mut ctx: SessionContext) -> Result<SessionContext> {
-    let sql = "CREATE EXTERNAL TABLE sales STORED AS CSV location 'tests/customer.csv'";
+    let sql =
+        "CREATE EXTERNAL TABLE sales STORED AS CSV location 'tests/data/customer.csv'";
 
     let expected = vec!["++", "++"];
 
diff --git a/docs/source/user-guide/dataframe.md b/docs/source/user-guide/dataframe.md
index 77768dd8b..23766cd07 100644
--- a/docs/source/user-guide/dataframe.md
+++ b/docs/source/user-guide/dataframe.md
@@ -39,7 +39,7 @@ Here is a minimal example showing the execution of a query using the DataFrame A
 
 ```rust
 let ctx = SessionContext::new();
-let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
 let df = df.filter(col("a").lt_eq(col("b")))?
            .aggregate(vec![col("a")], vec![min(col("b"))])?
            .limit(0, Some(100))?;
diff --git a/docs/source/user-guide/example-usage.md b/docs/source/user-guide/example-usage.md
index 4606aa974..03283a408 100644
--- a/docs/source/user-guide/example-usage.md
+++ b/docs/source/user-guide/example-usage.md
@@ -19,7 +19,7 @@
 
 # Example Usage
 
-In this example some simple processing is performed on the [`example.csv`](../../../datafusion/core/tests/example.csv) file.
+In this example some simple processing is performed on the [`example.csv`](../../../datafusion/core/tests/data/example.csv) file.
 
 ## Update `Cargo.toml`
 
@@ -39,7 +39,7 @@ use datafusion::prelude::*;
 async fn main() -> datafusion::error::Result<()> {
   // register the table
   let ctx = SessionContext::new();
-  ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new()).await?;
+  ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new()).await?;
 
   // create a plan to run a SQL query
   let df = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100").await?;
@@ -59,7 +59,7 @@ use datafusion::prelude::*;
 async fn main() -> datafusion::error::Result<()> {
   // create the dataframe
   let ctx = SessionContext::new();
-  let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+  let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
 
   let df = df.filter(col("a").lt_eq(col("b")))?
            .aggregate(vec![col("a")], vec![min(col("b"))])?
@@ -85,7 +85,7 @@ async fn main() -> datafusion::error::Result<()> {
 
 Please be aware that all identifiers are effectively made lower-case in SQL, so if your csv file has capital letters (ex: `Name`) you must put your column name in double quotes or the examples won't work.
 
-To illustrate this behavior, consider the [`capitalized_example.csv`](../../../datafusion/core/tests/capitalized_example.csv) file:
+To illustrate this behavior, consider the [`capitalized_example.csv`](../../../datafusion/core/tests/data/capitalized_example.csv) file:
 
 ## Run a SQL query against data stored in a CSV:
 
@@ -96,7 +96,7 @@ use datafusion::prelude::*;
 async fn main() -> datafusion::error::Result<()> {
   // register the table
   let ctx = SessionContext::new();
-  ctx.register_csv("example", "tests/capitalized_example.csv", CsvReadOptions::new()).await?;
+  ctx.register_csv("example", "tests/data/capitalized_example.csv", CsvReadOptions::new()).await?;
 
   // create a plan to run a SQL query
   let df = ctx.sql("SELECT \"A\", MIN(b) FROM example GROUP BY \"A\" LIMIT 100").await?;
@@ -116,7 +116,7 @@ use datafusion::prelude::*;
 async fn main() -> datafusion::error::Result<()> {
   // create the dataframe
   let ctx = SessionContext::new();
-  let df = ctx.read_csv("tests/capitalized_example.csv", CsvReadOptions::new()).await?;
+  let df = ctx.read_csv("tests/data/capitalized_example.csv", CsvReadOptions::new()).await?;
 
   let df = df.filter(col("A").lt_eq(col("c")))?
            .aggregate(vec![col("A")], vec![min(col("b"))])?