You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by th...@apache.org on 2021/09/15 11:29:00 UTC
[arrow-cookbook] branch main updated: Use as.data.frame instead of
dplyr::collect (#71)
This is an automated email from the ASF dual-hosted git repository.
thisisnic pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-cookbook.git
The following commit(s) were added to refs/heads/main by this push:
new e21720e Use as.data.frame instead of dplyr::collect (#71)
e21720e is described below
commit e21720e6456129cffb2329ad3cc2e46e188c3ca8
Author: Nic <th...@gmail.com>
AuthorDate: Wed Sep 15 11:28:56 2021 +0000
Use as.data.frame instead of dplyr::collect (#71)
* Use as.data.frame instead of dplyr::collect
* Typo
---
r/content/reading_and_writing_data.Rmd | 54 +++++++++++++++++-----------------
1 file changed, 27 insertions(+), 27 deletions(-)
diff --git a/r/content/reading_and_writing_data.Rmd b/r/content/reading_and_writing_data.Rmd
index 13a5832..53671ac 100644
--- a/r/content/reading_and_writing_data.Rmd
+++ b/r/content/reading_and_writing_data.Rmd
@@ -2,44 +2,44 @@
This chapter contains recipes related to reading and writing data using Apache
Arrow. When reading files into R using Apache Arrow, you can choose to read in
-your file as either a `tibble` or as an Arrow Table object.
+your file as either a data frame or as an Arrow Table object.
There are a number of circumstances in which you may want to read in the data as an Arrow Table:
* your dataset is large and if you load it into memory, it may lead to performance issues
* you want faster performance from your `dplyr` queries
* you want to be able to take advantage of Arrow's compute functions
-## Converting from a tibble to an Arrow Table
+## Converting from a data frame to an Arrow Table
-You want to convert an existing `tibble` or `data.frame` into an Arrow Table.
+You want to convert an existing `data.frame` or `tibble` object into an Arrow Table.
### Solution
-```{r, table_create_from_tibble}
+```{r, table_create_from_df}
air_table <- Table$create(airquality)
air_table
```
-```{r, test_table_create_from_tibble, opts.label = "test"}
-test_that("table_create_from_tibble chunk works as expected", {
+```{r, test_table_create_from_df, opts.label = "test"}
+test_that("table_create_from_df chunk works as expected", {
expect_s3_class(air_table, "Table")
})
```
-## Converting data from an Arrow Table to a tibble
+## Converting data from an Arrow Table to a data frame
-You want to convert an Arrow Table to a tibble to view the data or work with it
-in your usual analytics pipeline. You can use either `dplyr::collect()` or
-`as.data.frame()` to do this.
+You want to convert an Arrow Table to a data frame to view the data or work with it
+in your usual analytics pipeline. You can use either `as.data.frame()` or
+`dplyr::collect()` to do this.
### Solution
-```{r, collect_table}
-air_tibble <- dplyr::collect(air_table)
-air_tibble
+```{r, asdf_table}
+air_df <- as.data.frame(air_table)
+air_df
```
-```{r, test_collect_table, opts.label = "test"}
-test_that("collect_table chunk works as expected", {
- expect_identical(air_tibble, airquality)
+```{r, test_asdf_table, opts.label = "test"}
+test_that("asdf_table chunk works as expected", {
+ expect_identical(air_df, airquality)
})
```
@@ -51,7 +51,7 @@ You want to write Parquet files to disk.
```{r, write_parquet}
# Create table
-my_table <- Table$create(tibble::tibble(group = c("A", "B", "C"), score = c(99, 97, 99)))
+my_table <- Table$create(data.frame(group = c("A", "B", "C"), score = c(99, 97, 99)))
# Write to Parquet
write_parquet(my_table, "my_table.parquet")
```
@@ -73,7 +73,7 @@ parquet_tbl
```
```{r, test_read_parquet, opts.label = "test"}
test_that("read_parquet works as expected", {
- expect_identical(parquet_tbl, tibble::tibble(group = c("A", "B", "C"), score = c(99, 97, 99)))
+ expect_identical(parquet_tbl, data.frame(group = c("A", "B", "C"), score = c(99, 97, 99)))
})
```
@@ -128,7 +128,7 @@ You want to specify which columns to include when reading in a Parquet file.
```{r, read_parquet_filter}
# Create table to read back in
-dist_time <- Table$create(tibble::tibble(distance = c(12.2, 15.7, 14.2), time = c(43, 44, 40)))
+dist_time <- Table$create(data.frame(distance = c(12.2, 15.7, 14.2), time = c(43, 44, 40)))
# Write to Parquet
write_parquet(dist_time, "dist_time.parquet")
@@ -138,7 +138,7 @@ time_only
```
```{r, test_read_parquet_filter, opts.label = "test"}
test_that("read_parquet_filter works as expected", {
- expect_identical(time_only, tibble::tibble(time = c(43, 44, 40)))
+ expect_identical(time_only, data.frame(time = c(43, 44, 40)))
})
```
@@ -149,7 +149,7 @@ You want to read in a Feather file.
### Solution
```{r, write_feather}
-my_table <- Table$create(tibble::tibble(group = c("A", "B", "C"), score = c(99, 97, 99)))
+my_table <- Table$create(data.frame(group = c("A", "B", "C"), score = c(99, 97, 99)))
write_feather(my_table, "my_table.arrow")
```
```{r, test_write_feather, opts.label = "test"}
@@ -163,7 +163,7 @@ For legacy support, you can write data in the original Feather format by setting
```{r, write_feather1}
# Create table
-my_table <- Table$create(tibble::tibble(group = c("A", "B", "C"), score = c(99, 97, 99)))
+my_table <- Table$create(data.frame(group = c("A", "B", "C"), score = c(99, 97, 99)))
# Write to Feather format V1
write_feather(mtcars, "my_table.feather", version = 1)
```
@@ -186,7 +186,7 @@ my_feather_tbl <- read_feather("my_table.arrow")
```
```{r, test_read_feather, opts.label = "test"}
test_that("read_feather chunk works as expected", {
- expect_identical(dplyr::collect(my_feather_tbl), tibble::tibble(group = c("A", "B", "C"), score = c(99, 97, 99)))
+ expect_identical(as.data.frame(my_feather_tbl), data.frame(group = c("A", "B", "C"), score = c(99, 97, 99)))
})
unlink("my_table.arrow")
```
@@ -200,7 +200,7 @@ You want to write to the IPC stream format.
```{r, write_ipc_stream}
# Create table
my_table <- Table$create(
- tibble::tibble(
+ data.frame(
group = c("A", "B", "C"),
score = c(99, 97, 99)
)
@@ -226,7 +226,7 @@ my_ipc_stream <- arrow::read_ipc_stream("my_table.arrows")
test_that("read_ipc_stream chunk works as expected", {
expect_equal(
my_ipc_stream,
- tibble::tibble(group = c("A", "B", "C"), score = c(99, 97, 99))
+ data.frame(group = c("A", "B", "C"), score = c(99, 97, 99))
)
})
unlink("my_table.arrows")
@@ -253,7 +253,7 @@ my_csv <- read_csv_arrow("cars.csv", as_data_frame = FALSE)
```{r, test_read_csv_arrow, opts.label = "test"}
test_that("read_csv_arrow chunk works as expected", {
- expect_equivalent(dplyr::collect(my_csv), cars)
+ expect_equivalent(as.data.frame(my_csv), cars)
})
unlink("cars.csv")
```
@@ -281,7 +281,7 @@ countries
test_that("read_json_arrow chunk works as expected", {
expect_equivalent(
countries,
- tibble::tibble(
+ data.frame(
country = c("United Kingdom", "France", "Germany"),
long = c(-3.44, 2.21, 10.45),
lat = c(55.38, 46.23, 51.17)