You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2019/08/27 08:37:36 UTC
[arrow] branch master updated: ARROW-6338: [R] Type function names
don't match type names
This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new cef5f36 ARROW-6338: [R] Type function names don't match type names
cef5f36 is described below
commit cef5f36a1eefea9c10c05f2b7bbc171fa6341103
Author: Neal Richardson <ne...@gmail.com>
AuthorDate: Tue Aug 27 10:37:27 2019 +0200
ARROW-6338: [R] Type function names don't match type names
The substance of the change is in https://github.com/apache/arrow/commit/d845cd3e06a72f617ded97385d54dfe722f24302, where I added aliases for 4 of the type factory functions, added some magic to make `double()` work as expected without masking the base R function, and also improved the error message for when you specify a type incorrectly. The rest of the changes are a little code reorganization and adding documentation for type and schema definition.
Closes #5198 from nealrichardson/type-names and squashes the following commits:
087133db1 <Neal Richardson> Rename test file
40c824616 <Neal Richardson> Add docs for types and schema
640e27d07 <Neal Richardson> Update namespace and collation
d845cd3e0 <Neal Richardson> Add aliases for creating types based on their printed names
4dcf3d645 <Neal Richardson> Finish the move
86afac870 <Neal Richardson> mv R6.R type.R
Authored-by: Neal Richardson <ne...@gmail.com>
Signed-off-by: Antoine Pitrou <an...@python.org>
---
r/DESCRIPTION | 4 +-
r/NAMESPACE | 4 +
r/R/ArrayData.R | 2 +-
r/R/ChunkedArray.R | 2 +-
r/R/Column.R | 2 +-
r/R/Field.R | 11 +-
r/R/List.R | 4 +-
r/R/RecordBatch.R | 2 +-
r/R/RecordBatchReader.R | 2 +-
r/R/RecordBatchWriter.R | 2 +-
r/R/Schema.R | 14 ++-
r/R/Struct.R | 4 +-
r/R/Table.R | 2 +-
r/R/array.R | 2 +-
r/R/arrow-package.R | 28 +++++
r/R/buffer.R | 2 +-
r/R/compression.R | 2 +-
r/R/csv.R | 2 +-
r/R/dictionary.R | 8 +-
r/R/enums.R | 16 +--
r/R/feather.R | 2 +-
r/R/io.R | 2 +-
r/R/json.R | 4 +-
r/R/memory_pool.R | 2 +-
r/R/message.R | 2 +-
r/R/parquet.R | 2 +-
r/R/{R6.R => type.R} | 115 ++++++++++---------
r/_pkgdown.yml | 12 +-
r/man/DataType.Rd | 111 -------------------
r/man/arrow__DataType.Rd | 2 +-
r/man/arrow__FixedWidthType.Rd | 2 +-
r/man/data-type.Rd | 122 +++++++++++++++++++++
r/man/dictionary.Rd | 9 +-
r/man/enums.Rd | 34 ++++++
r/man/schema.Rd | 11 +-
r/man/type.Rd | 2 +-
.../testthat/{test-DataType.R => test-data-type.R} | 0
r/tests/testthat/test-field.R | 4 +
r/tests/testthat/test-schema.R | 7 ++
39 files changed, 339 insertions(+), 221 deletions(-)
diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index d2ecde3..7f695e2 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -47,7 +47,8 @@ Suggests:
vctrs
Collate:
'enums.R'
- 'R6.R'
+ 'arrow-package.R'
+ 'type.R'
'ArrayData.R'
'ChunkedArray.R'
'Column.R'
@@ -60,7 +61,6 @@ Collate:
'Struct.R'
'Table.R'
'array.R'
- 'arrow-package.R'
'arrowExports.R'
'buffer.R'
'io.R'
diff --git a/r/NAMESPACE b/r/NAMESPACE
index d97fa4c..5744548 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -110,6 +110,7 @@ export(TimeUnit)
export(Type)
export(array)
export(arrow_available)
+export(bool)
export(boolean)
export(buffer)
export(cast_options)
@@ -128,9 +129,11 @@ export(dictionary)
export(ends_with)
export(everything)
export(field)
+export(float)
export(float16)
export(float32)
export(float64)
+export(halffloat)
export(install_arrow)
export(int16)
export(int32)
@@ -163,6 +166,7 @@ export(read_tsv_arrow)
export(record_batch)
export(schema)
export(starts_with)
+export(string)
export(struct)
export(table)
export(time32)
diff --git a/r/R/ArrayData.R b/r/R/ArrayData.R
index f183179..d9f307b 100644
--- a/r/R/ArrayData.R
+++ b/r/R/ArrayData.R
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-#' @include R6.R
+#' @include type.R
#' @title class arrow::ArrayData
#'
diff --git a/r/R/ChunkedArray.R b/r/R/ChunkedArray.R
index fa9aaee..e407a49 100644
--- a/r/R/ChunkedArray.R
+++ b/r/R/ChunkedArray.R
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-#' @include R6.R
+#' @include arrow-package.R
#' @title class arrow::ChunkedArray
#'
diff --git a/r/R/Column.R b/r/R/Column.R
index fb8af1e..0487425 100644
--- a/r/R/Column.R
+++ b/r/R/Column.R
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-#' @include R6.R
+#' @include type.R
#' @title class arrow::Column
#'
diff --git a/r/R/Field.R b/r/R/Field.R
index d5bdf22..cc2f6cd 100644
--- a/r/R/Field.R
+++ b/r/R/Field.R
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-#' @include R6.R
+#' @include arrow-package.R
#' @title class arrow::Field
#'
@@ -72,7 +72,14 @@
#' @export
field <- function(name, type, metadata) {
assert_that(inherits(name, "character"), length(name) == 1L)
- assert_that(inherits(type, "arrow::DataType"))
+ if (!inherits(type, "arrow::DataType")) {
+ if (identical(type, double())) {
+ # Magic so that we don't have to mask this base function
+ type <- float64()
+ } else {
+ stop(name, " must be arrow::DataType, not ", class(type), call. = FALSE)
+ }
+ }
assert_that(missing(metadata), msg = "metadata= is currently ignored")
shared_ptr(`arrow::Field`, Field__initialize(name, type, TRUE))
}
diff --git a/r/R/List.R b/r/R/List.R
index efd8839..a970fb8 100644
--- a/r/R/List.R
+++ b/r/R/List.R
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-#' @include R6.R
+#' @include type.R
`arrow::ListType` <- R6Class("arrow::ListType",
inherit = `arrow::NestedType`,
@@ -25,6 +25,6 @@
)
)
-#' @rdname DataType
+#' @rdname data-type
#' @export
list_of <- function(type) shared_ptr(`arrow::ListType`, list__(type))
diff --git a/r/R/RecordBatch.R b/r/R/RecordBatch.R
index 6446c95..de8b01e 100644
--- a/r/R/RecordBatch.R
+++ b/r/R/RecordBatch.R
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-#' @include R6.R
+#' @include arrow-package.R
#' @title class arrow::RecordBatch
#'
diff --git a/r/R/RecordBatchReader.R b/r/R/RecordBatchReader.R
index ae3bd27..6593b0b 100644
--- a/r/R/RecordBatchReader.R
+++ b/r/R/RecordBatchReader.R
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-#' @include R6.R
+#' @include arrow-package.R
#' @title class arrow::RecordBatchReader
#'
diff --git a/r/R/RecordBatchWriter.R b/r/R/RecordBatchWriter.R
index eb0a9c6..7185dc2 100644
--- a/r/R/RecordBatchWriter.R
+++ b/r/R/RecordBatchWriter.R
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-#' @include R6.R
+#' @include arrow-package.R
#' @title class arrow::ipc::RecordBatchWriter
#'
diff --git a/r/R/Schema.R b/r/R/Schema.R
index 0d47196..906841b 100644
--- a/r/R/Schema.R
+++ b/r/R/Schema.R
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-#' @include R6.R
+#' @include arrow-package.R
#' @title class arrow::Schema
#'
@@ -60,13 +60,19 @@
lhs$Equals(rhs)
}
-#' Schema factory
+#' Create a schema
#'
-#' @param ... named list of data types
+#' This function lets you define a schema for a table. This is useful when you
+#' want to convert an R `data.frame` to Arrow but don't want to rely on the
+#' default mapping of R types to Arrow types, such as when you want to choose a
+#' specific numeric precision.
#'
-#' @return a [schema][arrow__Schema]
+#' @param ... named list of [data types][data-type]
+#'
+#' @return A [schema][arrow__Schema] object.
#'
#' @export
+# TODO (npr): add examples once ARROW-5505 merges
schema <- function(...){
shared_ptr(`arrow::Schema`, schema_(.fields(list2(...))))
}
diff --git a/r/R/Struct.R b/r/R/Struct.R
index 820e1a8..fa35b7e 100644
--- a/r/R/Struct.R
+++ b/r/R/Struct.R
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-#' @include R6.R
+#' @include type.R
`arrow::StructType` <- R6Class("arrow::StructType",
inherit = `arrow::NestedType`,
@@ -25,7 +25,7 @@
)
)
-#' @rdname DataType
+#' @rdname data-type
#' @export
struct <- function(...){
xp <- struct_(.fields(list(...)))
diff --git a/r/R/Table.R b/r/R/Table.R
index 15ea48f..1255c8a 100644
--- a/r/R/Table.R
+++ b/r/R/Table.R
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-#' @include R6.R
+#' @include arrow-package.R
#'
#' @title class arrow::Table
#'
diff --git a/r/R/array.R b/r/R/array.R
index deb3bc5..fd7c6ef 100644
--- a/r/R/array.R
+++ b/r/R/array.R
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-#' @include R6.R
+#' @include arrow-package.R
#' @title class arrow::Array
#'
diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index 86e909e..0f0a26b 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -42,3 +42,31 @@ arrow_available <- function() {
option_use_threads <- function() {
!is_false(getOption("arrow.use_threads"))
}
+
+#' @include enums.R
+`arrow::Object` <- R6Class("arrow::Object",
+ public = list(
+ initialize = function(xp) self$set_pointer(xp),
+
+ pointer = function() self$`.:xp:.`,
+ `.:xp:.` = NULL,
+ set_pointer = function(xp){
+ self$`.:xp:.` <- xp
+ },
+ print = function(...){
+ cat(class(self)[[1]], "\n")
+ if (!is.null(self$ToString)){
+ cat(self$ToString(), "\n")
+ }
+ invisible(self)
+ }
+ )
+)
+
+shared_ptr <- function(class, xp) {
+ if (!shared_ptr_is_null(xp)) class$new(xp)
+}
+
+unique_ptr <- function(class, xp) {
+ if (!unique_ptr_is_null(xp)) class$new(xp)
+}
diff --git a/r/R/buffer.R b/r/R/buffer.R
index 327d853..12d0699 100644
--- a/r/R/buffer.R
+++ b/r/R/buffer.R
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-#' @include R6.R
+#' @include arrow-package.R
#' @include enums.R
#' @title class arrow::Buffer
diff --git a/r/R/compression.R b/r/R/compression.R
index 399fcb8..6e56a76 100644
--- a/r/R/compression.R
+++ b/r/R/compression.R
@@ -16,7 +16,7 @@
# under the License.
#' @include enums.R
-#' @include R6.R
+#' @include arrow-package.R
#' @include io.R
`arrow::util::Codec` <- R6Class("arrow::util::Codec", inherit = `arrow::Object`)
diff --git a/r/R/csv.R b/r/R/csv.R
index 3c5e5b7..202d571 100644
--- a/r/R/csv.R
+++ b/r/R/csv.R
@@ -189,7 +189,7 @@ read_tsv_arrow <- function(file,
eval.parent(mc)
}
-#' @include R6.R
+#' @include arrow-package.R
`arrow::csv::TableReader` <- R6Class("arrow::csv::TableReader", inherit = `arrow::Object`,
public = list(
diff --git a/r/R/dictionary.R b/r/R/dictionary.R
index bfe2373..9262a51 100644
--- a/r/R/dictionary.R
+++ b/r/R/dictionary.R
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-#' @include R6.R
+#' @include type.R
#' @title class arrow::DictionaryType
#'
@@ -40,14 +40,14 @@
)
)
-#' dictionary type factory
+#' Create a dictionary type
#'
#' @param index_type index type, e.g. [int32()]
#' @param value_type value type, probably [utf8()]
#' @param ordered Is this an ordered dictionary ?
#'
-#' @return a [arrow::DictionaryType][arrow__DictionaryType]
-#'
+#' @return An [arrow::DictionaryType][arrow__DictionaryType]
+#' @seealso [Other Arrow data types][data-type]
#' @export
dictionary <- function(index_type, value_type, ordered = FALSE) {
assert_that(
diff --git a/r/R/enums.R b/r/R/enums.R
index e45277b..5c24ce8 100644
--- a/r/R/enums.R
+++ b/r/R/enums.R
@@ -27,17 +27,19 @@ enum <- function(class, ..., .list = list(...)){
)
}
-#' @rdname DataType
+#' Arrow enums
+#' @name enums
#' @export
+#' @keywords internal
TimeUnit <- enum("arrow::TimeUnit::type",
SECOND = 0L, MILLI = 1L, MICRO = 2L, NANO = 3L
)
-#' @rdname DataType
+#' @rdname enums
#' @export
DateUnit <- enum("arrow::DateUnit", DAY = 0L, MILLI = 1L)
-#' @rdname DataType
+#' @rdname enums
#' @export
Type <- enum("arrow::Type::type",
"NA" = 0L, BOOL = 1L, UINT8 = 2L, INT8 = 3L, UINT16 = 4L, INT16 = 5L,
@@ -48,7 +50,7 @@ Type <- enum("arrow::Type::type",
UNION = 25L, DICTIONARY = 26L, MAP = 27L
)
-#' @rdname DataType
+#' @rdname enums
#' @export
StatusCode <- enum("arrow::StatusCode",
OK = 0L, OutOfMemory = 1L, KeyError = 2L, TypeError = 3L,
@@ -59,19 +61,19 @@ StatusCode <- enum("arrow::StatusCode",
PlasmaStoreFull = 22L, PlasmaObjectAlreadySealed = 23L
)
-#' @rdname DataType
+#' @rdname enums
#' @export
FileMode <- enum("arrow::io::FileMode",
READ = 0L, WRITE = 1L, READWRITE = 2L
)
-#' @rdname DataType
+#' @rdname enums
#' @export
MessageType <- enum("arrow::ipc::Message::Type",
NONE = 0L, SCHEMA = 1L, DICTIONARY_BATCH = 2L, RECORD_BATCH = 3L, TENSOR = 4L
)
-#' @rdname DataType
+#' @rdname enums
#' @export
CompressionType <- enum("arrow::Compression::type",
UNCOMPRESSED = 0L, SNAPPY = 1L, GZIP = 2L, BROTLI = 3L, ZSTD = 4L, LZ4 = 5L, LZO = 6L, BZ2 = 7L
diff --git a/r/R/feather.R b/r/R/feather.R
index 48123f7..46c3f5f 100644
--- a/r/R/feather.R
+++ b/r/R/feather.R
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-#' @include R6.R
+#' @include arrow-package.R
`arrow::ipc::feather::TableWriter` <- R6Class("arrow::ipc::feather::TableWriter", inherit = `arrow::Object`,
public = list(
diff --git a/r/R/io.R b/r/R/io.R
index 3169a18..59d573f 100644
--- a/r/R/io.R
+++ b/r/R/io.R
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-#' @include R6.R
+#' @include arrow-package.R
#' @include enums.R
#' @include buffer.R
diff --git a/r/R/json.R b/r/R/json.R
index 9573ff5..cd43231 100644
--- a/r/R/json.R
+++ b/r/R/json.R
@@ -15,9 +15,9 @@
# specific language governing permissions and limitations
# under the License.
-#' @include R6.R
+#' @include arrow-package.R
-#' @include R6.R
+#' @include arrow-package.R
#'
#' @title class arrow::json::TableReader
#'
diff --git a/r/R/memory_pool.R b/r/R/memory_pool.R
index 88c2c7b..771e05b 100644
--- a/r/R/memory_pool.R
+++ b/r/R/memory_pool.R
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-#' @include R6.R
+#' @include arrow-package.R
#'
#' @title class arrow::MemoryPool
#'
diff --git a/r/R/message.R b/r/R/message.R
index 98d9248..e0add59 100644
--- a/r/R/message.R
+++ b/r/R/message.R
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-#' @include R6.R
+#' @include arrow-package.R
#' @title class arrow::ipc::Message
#'
diff --git a/r/R/parquet.R b/r/R/parquet.R
index c76619c..6f122e5 100644
--- a/r/R/parquet.R
+++ b/r/R/parquet.R
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-#' @include R6.R
+#' @include arrow-package.R
`parquet::arrow::FileReader` <- R6Class("parquet::arrow::FileReader",
inherit = `arrow::Object`,
diff --git a/r/R/R6.R b/r/R/type.R
similarity index 83%
rename from r/R/R6.R
rename to r/R/type.R
index 06dd6f0..b763ed8 100644
--- a/r/R/R6.R
+++ b/r/R/type.R
@@ -15,33 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-#' @include enums.R
-`arrow::Object` <- R6Class("arrow::Object",
- public = list(
- initialize = function(xp) self$set_pointer(xp),
-
- pointer = function() self$`.:xp:.`,
- `.:xp:.` = NULL,
- set_pointer = function(xp){
- self$`.:xp:.` <- xp
- },
- print = function(...){
- cat(class(self)[[1]], "\n")
- if (!is.null(self$ToString)){
- cat(self$ToString(), "\n")
- }
- invisible(self)
- }
- )
-)
-
-shared_ptr <- function(class, xp) {
- if (!shared_ptr_is_null(xp)) class$new(xp)
-}
-
-unique_ptr <- function(class, xp) {
- if (!unique_ptr_is_null(xp)) class$new(xp)
-}
+#' @include arrow-package.R
#' @export
`!=.arrow::Object` <- function(lhs, rhs){
@@ -280,88 +254,121 @@ type.default <- function(x) {
#' Apache Arrow data types
#'
-#' Apache Arrow data types
+#' These functions create type objects corresponding to Arrow types. Use them
+#' when defining a [schema()] or as inputs to other types, like `struct`. Most
+#' of these functions don't take arguments, but a few do.
#'
-#' @param unit time unit
-#' @param timezone time zone
-#' @param precision precision
-#' @param scale scale
-#' @param type type
-#' @param ... ...
+#' A few functions have aliases:
#'
-#' @rdname DataType
+#' * `utf8()` and `string()`
+#' * `float16()` and `halffloat()`
+#' * `float32()` and `float()`
+#' * `bool()` and `boolean()`
+#' * Called from `schema()` or `struct()`, `double()` also is supported as a
+#' way of creating a `float64()`
+#'
+#' @param unit For date/time types, the time unit (day, second, millisecond, etc.)
+#' @param timezone For `timestamp()`, an optional time zone.
+#' @param precision For `decimal()`, precision
+#' @param scale For `decimal()`, scale
+#' @param type For `list_of()`, a data type to make a list-of-type
+#' @param ... For `struct()`, a named list of types to define the struct columns
+#'
+#' @name data-type
#' @export
+#' @seealso [dictionary()] for creating a dictionary (factor-like) type.
+#' @examples
+#' \donttest{
+#' bool()
+#' struct(a = int32(), b = double())
+#' }
int8 <- function() shared_ptr(`arrow::Int8`, Int8__initialize())
-#' @rdname DataType
+#' @rdname data-type
#' @export
int16 <- function() shared_ptr(`arrow::Int16`, Int16__initialize())
-#' @rdname DataType
+#' @rdname data-type
#' @export
int32 <- function() shared_ptr(`arrow::Int32`, Int32__initialize())
-#' @rdname DataType
+#' @rdname data-type
#' @export
int64 <- function() shared_ptr(`arrow::Int64`, Int64__initialize())
-#' @rdname DataType
+#' @rdname data-type
#' @export
uint8 <- function() shared_ptr(`arrow::UInt8`, UInt8__initialize())
-#' @rdname DataType
+#' @rdname data-type
#' @export
uint16 <- function() shared_ptr(`arrow::UInt16`, UInt16__initialize())
-#' @rdname DataType
+#' @rdname data-type
#' @export
uint32 <- function() shared_ptr(`arrow::UInt32`, UInt32__initialize())
-#' @rdname DataType
+#' @rdname data-type
#' @export
uint64 <- function() shared_ptr(`arrow::UInt64`, UInt64__initialize())
-#' @rdname DataType
+#' @rdname data-type
#' @export
float16 <- function() shared_ptr(`arrow::Float16`, Float16__initialize())
-#' @rdname DataType
+#' @rdname data-type
+#' @export
+halffloat <- float16
+
+#' @rdname data-type
#' @export
float32 <- function() shared_ptr(`arrow::Float32`, Float32__initialize())
-#' @rdname DataType
+#' @rdname data-type
+#' @export
+float <- float32
+
+#' @rdname data-type
#' @export
float64 <- function() shared_ptr(`arrow::Float64`, Float64__initialize())
-#' @rdname DataType
+#' @rdname data-type
#' @export
boolean <- function() shared_ptr(`arrow::Boolean`, Boolean__initialize())
-#' @rdname DataType
+#' @rdname data-type
+#' @export
+bool <- boolean
+
+#' @rdname data-type
#' @export
utf8 <- function() shared_ptr(`arrow::Utf8`, Utf8__initialize())
-#' @rdname DataType
+#' @rdname data-type
+#' @export
+string <- utf8
+
+#' @rdname data-type
#' @export
date32 <- function() shared_ptr(`arrow::Date32`, Date32__initialize())
-#' @rdname DataType
+#' @rdname data-type
#' @export
date64 <- function() shared_ptr(`arrow::Date64`, Date64__initialize())
-#' @rdname DataType
+#' @rdname data-type
#' @export
time32 <- function(unit) shared_ptr(`arrow::Time32`, Time32__initialize(unit))
-#' @rdname DataType
+#' @rdname data-type
#' @export
time64 <- function(unit) shared_ptr(`arrow::Time64`, Time64__initialize(unit))
-#' @rdname DataType
+#' @rdname data-type
#' @export
null <- function() shared_ptr(`arrow::Null`, Null__initialize())
-#' @rdname DataType
+#' @rdname data-type
#' @export
timestamp <- function(unit, timezone) {
if (missing(timezone)) {
@@ -371,7 +378,7 @@ timestamp <- function(unit, timezone) {
}
}
-#' @rdname DataType
+#' @rdname data-type
#' @export
decimal <- function(precision, scale) shared_ptr(`arrow::Decimal128Type`, Decimal128Type__initialize(precision, scale))
diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml
index 552eff9..98baa35 100644
--- a/r/_pkgdown.yml
+++ b/r/_pkgdown.yml
@@ -66,15 +66,18 @@ reference:
- array
- chunked_array
- record_batch
- - schema
- table
+ - read_message
+ - read_record_batch
+ - read_table
+- title: Arrow data types and schema
+ contents:
+ - schema
- type
- dictionary
- field
- - read_message
- - read_record_batch
- read_schema
- - read_table
+ - data-type
- title: R6 classes
contents:
- arrow__Array
@@ -116,7 +119,6 @@ reference:
- BufferReader
- CompressedInputStream
- CompressedOutputStream
- - TimeUnit
- FeatherTableReader
- FeatherTableWriter
- FileOutputStream
diff --git a/r/man/DataType.Rd b/r/man/DataType.Rd
deleted file mode 100644
index bf5f1d4..0000000
--- a/r/man/DataType.Rd
+++ /dev/null
@@ -1,111 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/enums.R, R/R6.R, R/List.R, R/Struct.R
-\docType{data}
-\name{TimeUnit}
-\alias{TimeUnit}
-\alias{DateUnit}
-\alias{Type}
-\alias{StatusCode}
-\alias{FileMode}
-\alias{MessageType}
-\alias{CompressionType}
-\alias{int8}
-\alias{int16}
-\alias{int32}
-\alias{int64}
-\alias{uint8}
-\alias{uint16}
-\alias{uint32}
-\alias{uint64}
-\alias{float16}
-\alias{float32}
-\alias{float64}
-\alias{boolean}
-\alias{utf8}
-\alias{date32}
-\alias{date64}
-\alias{time32}
-\alias{time64}
-\alias{null}
-\alias{timestamp}
-\alias{decimal}
-\alias{list_of}
-\alias{struct}
-\title{Apache Arrow data types}
-\format{An object of class \code{arrow::TimeUnit::type} (inherits from \code{arrow-enum}) of length 4.}
-\usage{
-TimeUnit
-
-DateUnit
-
-Type
-
-StatusCode
-
-FileMode
-
-MessageType
-
-CompressionType
-
-int8()
-
-int16()
-
-int32()
-
-int64()
-
-uint8()
-
-uint16()
-
-uint32()
-
-uint64()
-
-float16()
-
-float32()
-
-float64()
-
-boolean()
-
-utf8()
-
-date32()
-
-date64()
-
-time32(unit)
-
-time64(unit)
-
-null()
-
-timestamp(unit, timezone)
-
-decimal(precision, scale)
-
-list_of(type)
-
-struct(...)
-}
-\arguments{
-\item{unit}{time unit}
-
-\item{timezone}{time zone}
-
-\item{precision}{precision}
-
-\item{scale}{scale}
-
-\item{type}{type}
-
-\item{...}{...}
-}
-\description{
-Apache Arrow data types
-}
-\keyword{datasets}
diff --git a/r/man/arrow__DataType.Rd b/r/man/arrow__DataType.Rd
index 53bd632..4eeb051 100644
--- a/r/man/arrow__DataType.Rd
+++ b/r/man/arrow__DataType.Rd
@@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/R6.R
+% Please edit documentation in R/type.R
\docType{class}
\name{arrow__DataType}
\alias{arrow__DataType}
diff --git a/r/man/arrow__FixedWidthType.Rd b/r/man/arrow__FixedWidthType.Rd
index 610a400..075c0ee 100644
--- a/r/man/arrow__FixedWidthType.Rd
+++ b/r/man/arrow__FixedWidthType.Rd
@@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/R6.R
+% Please edit documentation in R/type.R
\docType{class}
\name{arrow__FixedWidthType}
\alias{arrow__FixedWidthType}
diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd
new file mode 100644
index 0000000..316ff49
--- /dev/null
+++ b/r/man/data-type.Rd
@@ -0,0 +1,122 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/type.R, R/List.R, R/Struct.R
+\name{data-type}
+\alias{data-type}
+\alias{int8}
+\alias{int16}
+\alias{int32}
+\alias{int64}
+\alias{uint8}
+\alias{uint16}
+\alias{uint32}
+\alias{uint64}
+\alias{float16}
+\alias{halffloat}
+\alias{float32}
+\alias{float}
+\alias{float64}
+\alias{boolean}
+\alias{bool}
+\alias{utf8}
+\alias{string}
+\alias{date32}
+\alias{date64}
+\alias{time32}
+\alias{time64}
+\alias{null}
+\alias{timestamp}
+\alias{decimal}
+\alias{list_of}
+\alias{struct}
+\title{Apache Arrow data types}
+\usage{
+int8()
+
+int16()
+
+int32()
+
+int64()
+
+uint8()
+
+uint16()
+
+uint32()
+
+uint64()
+
+float16()
+
+halffloat()
+
+float32()
+
+float()
+
+float64()
+
+boolean()
+
+bool()
+
+utf8()
+
+string()
+
+date32()
+
+date64()
+
+time32(unit)
+
+time64(unit)
+
+null()
+
+timestamp(unit, timezone)
+
+decimal(precision, scale)
+
+list_of(type)
+
+struct(...)
+}
+\arguments{
+\item{unit}{For date/time types, the time unit (day, second, millisecond, etc.)}
+
+\item{timezone}{For \code{timestamp()}, an optional time zone.}
+
+\item{precision}{For \code{decimal()}, precision}
+
+\item{scale}{For \code{decimal()}, scale}
+
+\item{type}{For \code{list_of()}, a data type to make a list-of-type}
+
+\item{...}{For \code{struct()}, a named list of types to define the struct columns}
+}
+\description{
+These functions create type objects corresponding to Arrow types. Use them
+when defining a \code{\link[=schema]{schema()}} or as inputs to other types, like \code{struct}. Most
+of these functions don't take arguments, but a few do.
+}
+\details{
+A few functions have aliases:
+\itemize{
+\item \code{utf8()} and \code{string()}
+\item \code{float16()} and \code{halffloat()}
+\item \code{float32()} and \code{float()}
+\item \code{bool()} and \code{boolean()}
+\item Called from \code{schema()} or \code{struct()}, \code{double()} also is supported as a
+way of creating a \code{float64()}
+}
+}
+\examples{
+\donttest{
+bool()
+struct(a = int32(), b = double())
+}
+}
+\seealso{
+\code{\link[=dictionary]{dictionary()}} for creating a dictionary (factor-like) type.
+}
diff --git a/r/man/dictionary.Rd b/r/man/dictionary.Rd
index 9662328..334d67e 100644
--- a/r/man/dictionary.Rd
+++ b/r/man/dictionary.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/dictionary.R
\name{dictionary}
\alias{dictionary}
-\title{dictionary type factory}
+\title{Create a dictionary type}
\usage{
dictionary(index_type, value_type, ordered = FALSE)
}
@@ -14,8 +14,11 @@ dictionary(index_type, value_type, ordered = FALSE)
\item{ordered}{Is this an ordered dictionary ?}
}
\value{
-a \link[=arrow__DictionaryType]{arrow::DictionaryType}
+An \link[=arrow__DictionaryType]{arrow::DictionaryType}
}
\description{
-dictionary type factory
+Create a dictionary type
+}
+\seealso{
+\link[=data-type]{Other Arrow data types}
}
diff --git a/r/man/enums.Rd b/r/man/enums.Rd
new file mode 100644
index 0000000..c55170e
--- /dev/null
+++ b/r/man/enums.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/enums.R
+\docType{data}
+\name{enums}
+\alias{enums}
+\alias{TimeUnit}
+\alias{DateUnit}
+\alias{Type}
+\alias{StatusCode}
+\alias{FileMode}
+\alias{MessageType}
+\alias{CompressionType}
+\title{Arrow enums}
+\format{An object of class \code{arrow::TimeUnit::type} (inherits from \code{arrow-enum}) of length 4.}
+\usage{
+TimeUnit
+
+DateUnit
+
+Type
+
+StatusCode
+
+FileMode
+
+MessageType
+
+CompressionType
+}
+\description{
+Arrow enums
+}
+\keyword{datasets}
+\keyword{internal}
diff --git a/r/man/schema.Rd b/r/man/schema.Rd
index ad3bcb1..622e5a7 100644
--- a/r/man/schema.Rd
+++ b/r/man/schema.Rd
@@ -2,16 +2,19 @@
% Please edit documentation in R/Schema.R
\name{schema}
\alias{schema}
-\title{Schema factory}
+\title{Create a schema}
\usage{
schema(...)
}
\arguments{
-\item{...}{named list of data types}
+\item{...}{named list of \link[=data-type]{data types}}
}
\value{
-a \link[=arrow__Schema]{schema}
+A \link[=arrow__Schema]{schema} object.
}
\description{
-Schema factory
+This function lets you define a schema for a table. This is useful when you
+want to convert an R \code{data.frame} to Arrow but don't want to rely on the
+default mapping of R types to Arrow types, such as when you want to choose a
+specific numeric precision.
}
diff --git a/r/man/type.Rd b/r/man/type.Rd
index 3e2b4f4..2f85e4a 100644
--- a/r/man/type.Rd
+++ b/r/man/type.Rd
@@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/R6.R
+% Please edit documentation in R/type.R
\name{type}
\alias{type}
\title{infer the arrow Array type from an R vector}
diff --git a/r/tests/testthat/test-DataType.R b/r/tests/testthat/test-data-type.R
similarity index 100%
rename from r/tests/testthat/test-DataType.R
rename to r/tests/testthat/test-data-type.R
diff --git a/r/tests/testthat/test-field.R b/r/tests/testthat/test-field.R
index aaa2875..5d63a7f 100644
--- a/r/tests/testthat/test-field.R
+++ b/r/tests/testthat/test-field.R
@@ -24,3 +24,7 @@ test_that("field() factory", {
expect_true(x == x)
expect_false(x == field("x", int64()))
})
+
+test_that("Field validation", {
+ expect_error(schema(b = 32), "b must be arrow::DataType, not numeric")
+})
diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R
index ff40b81..387aee9 100644
--- a/r/tests/testthat/test-schema.R
+++ b/r/tests/testthat/test-schema.R
@@ -17,6 +17,13 @@
context("arrow::Schema")
+test_that("Alternate type names are supported", {
+ expect_equal(
+ schema(b = double(), c = bool(), d = string(), e = float(), f = halffloat()),
+ schema(b = float64(), c = boolean(), d = utf8(), e = float32(), f = float16())
+ )
+})
+
test_that("reading schema from Buffer", {
# TODO: this uses the streaming format, i.e. from RecordBatchStreamWriter
# maybe there is an easier way to serialize a schema