You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2019/08/27 08:37:36 UTC

[arrow] branch master updated: ARROW-6338: [R] Type function names don't match type names

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new cef5f36  ARROW-6338: [R] Type function names don't match type names
cef5f36 is described below

commit cef5f36a1eefea9c10c05f2b7bbc171fa6341103
Author: Neal Richardson <ne...@gmail.com>
AuthorDate: Tue Aug 27 10:37:27 2019 +0200

    ARROW-6338: [R] Type function names don't match type names
    
    The substance of the change is in https://github.com/apache/arrow/commit/d845cd3e06a72f617ded97385d54dfe722f24302, where I added aliases for 4 of the type factory functions, added some magic to make `double()` work as expected without masking the base R function, and also improved the error message for when you specify a type incorrectly. The rest of the changes are a little code reorganization and adding documentation for type and schema definition.
    
    Closes #5198 from nealrichardson/type-names and squashes the following commits:
    
    087133db1 <Neal Richardson> Rename test file
    40c824616 <Neal Richardson> Add docs for types and schema
    640e27d07 <Neal Richardson> Update namespace and collation
    d845cd3e0 <Neal Richardson> Add aliases for creating types based on their printed names
    4dcf3d645 <Neal Richardson> Finish the move
    86afac870 <Neal Richardson> mv R6.R type.R
    
    Authored-by: Neal Richardson <ne...@gmail.com>
    Signed-off-by: Antoine Pitrou <an...@python.org>
---
 r/DESCRIPTION                                      |   4 +-
 r/NAMESPACE                                        |   4 +
 r/R/ArrayData.R                                    |   2 +-
 r/R/ChunkedArray.R                                 |   2 +-
 r/R/Column.R                                       |   2 +-
 r/R/Field.R                                        |  11 +-
 r/R/List.R                                         |   4 +-
 r/R/RecordBatch.R                                  |   2 +-
 r/R/RecordBatchReader.R                            |   2 +-
 r/R/RecordBatchWriter.R                            |   2 +-
 r/R/Schema.R                                       |  14 ++-
 r/R/Struct.R                                       |   4 +-
 r/R/Table.R                                        |   2 +-
 r/R/array.R                                        |   2 +-
 r/R/arrow-package.R                                |  28 +++++
 r/R/buffer.R                                       |   2 +-
 r/R/compression.R                                  |   2 +-
 r/R/csv.R                                          |   2 +-
 r/R/dictionary.R                                   |   8 +-
 r/R/enums.R                                        |  16 +--
 r/R/feather.R                                      |   2 +-
 r/R/io.R                                           |   2 +-
 r/R/json.R                                         |   4 +-
 r/R/memory_pool.R                                  |   2 +-
 r/R/message.R                                      |   2 +-
 r/R/parquet.R                                      |   2 +-
 r/R/{R6.R => type.R}                               | 115 ++++++++++---------
 r/_pkgdown.yml                                     |  12 +-
 r/man/DataType.Rd                                  | 111 -------------------
 r/man/arrow__DataType.Rd                           |   2 +-
 r/man/arrow__FixedWidthType.Rd                     |   2 +-
 r/man/data-type.Rd                                 | 122 +++++++++++++++++++++
 r/man/dictionary.Rd                                |   9 +-
 r/man/enums.Rd                                     |  34 ++++++
 r/man/schema.Rd                                    |  11 +-
 r/man/type.Rd                                      |   2 +-
 .../testthat/{test-DataType.R => test-data-type.R} |   0
 r/tests/testthat/test-field.R                      |   4 +
 r/tests/testthat/test-schema.R                     |   7 ++
 39 files changed, 339 insertions(+), 221 deletions(-)

diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index d2ecde3..7f695e2 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -47,7 +47,8 @@ Suggests:
     vctrs
 Collate:
     'enums.R'
-    'R6.R'
+    'arrow-package.R'
+    'type.R'
     'ArrayData.R'
     'ChunkedArray.R'
     'Column.R'
@@ -60,7 +61,6 @@ Collate:
     'Struct.R'
     'Table.R'
     'array.R'
-    'arrow-package.R'
     'arrowExports.R'
     'buffer.R'
     'io.R'
diff --git a/r/NAMESPACE b/r/NAMESPACE
index d97fa4c..5744548 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -110,6 +110,7 @@ export(TimeUnit)
 export(Type)
 export(array)
 export(arrow_available)
+export(bool)
 export(boolean)
 export(buffer)
 export(cast_options)
@@ -128,9 +129,11 @@ export(dictionary)
 export(ends_with)
 export(everything)
 export(field)
+export(float)
 export(float16)
 export(float32)
 export(float64)
+export(halffloat)
 export(install_arrow)
 export(int16)
 export(int32)
@@ -163,6 +166,7 @@ export(read_tsv_arrow)
 export(record_batch)
 export(schema)
 export(starts_with)
+export(string)
 export(struct)
 export(table)
 export(time32)
diff --git a/r/R/ArrayData.R b/r/R/ArrayData.R
index f183179..d9f307b 100644
--- a/r/R/ArrayData.R
+++ b/r/R/ArrayData.R
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-#' @include R6.R
+#' @include type.R
 
 #' @title class arrow::ArrayData
 #'
diff --git a/r/R/ChunkedArray.R b/r/R/ChunkedArray.R
index fa9aaee..e407a49 100644
--- a/r/R/ChunkedArray.R
+++ b/r/R/ChunkedArray.R
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-#' @include R6.R
+#' @include arrow-package.R
 
 #' @title class arrow::ChunkedArray
 #'
diff --git a/r/R/Column.R b/r/R/Column.R
index fb8af1e..0487425 100644
--- a/r/R/Column.R
+++ b/r/R/Column.R
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-#' @include R6.R
+#' @include type.R
 
 #' @title class arrow::Column
 #'
diff --git a/r/R/Field.R b/r/R/Field.R
index d5bdf22..cc2f6cd 100644
--- a/r/R/Field.R
+++ b/r/R/Field.R
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-#' @include R6.R
+#' @include arrow-package.R
 
 #' @title class arrow::Field
 #'
@@ -72,7 +72,14 @@
 #' @export
 field <- function(name, type, metadata) {
   assert_that(inherits(name, "character"), length(name) == 1L)
-  assert_that(inherits(type, "arrow::DataType"))
+  if (!inherits(type, "arrow::DataType")) {
+    if (identical(type, double())) {
+      # Magic so that we don't have to mask this base function
+      type <- float64()
+    } else {
+      stop(name, " must be arrow::DataType, not ", class(type), call. = FALSE)
+    }
+  }
   assert_that(missing(metadata), msg = "metadata= is currently ignored")
   shared_ptr(`arrow::Field`, Field__initialize(name, type, TRUE))
 }
diff --git a/r/R/List.R b/r/R/List.R
index efd8839..a970fb8 100644
--- a/r/R/List.R
+++ b/r/R/List.R
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-#' @include R6.R
+#' @include type.R
 
 `arrow::ListType` <- R6Class("arrow::ListType",
   inherit = `arrow::NestedType`,
@@ -25,6 +25,6 @@
   )
 )
 
-#' @rdname DataType
+#' @rdname data-type
 #' @export
 list_of <- function(type) shared_ptr(`arrow::ListType`, list__(type))
diff --git a/r/R/RecordBatch.R b/r/R/RecordBatch.R
index 6446c95..de8b01e 100644
--- a/r/R/RecordBatch.R
+++ b/r/R/RecordBatch.R
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-#' @include R6.R
+#' @include arrow-package.R
 
 #' @title class arrow::RecordBatch
 #'
diff --git a/r/R/RecordBatchReader.R b/r/R/RecordBatchReader.R
index ae3bd27..6593b0b 100644
--- a/r/R/RecordBatchReader.R
+++ b/r/R/RecordBatchReader.R
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-#' @include R6.R
+#' @include arrow-package.R
 
 #' @title class arrow::RecordBatchReader
 #'
diff --git a/r/R/RecordBatchWriter.R b/r/R/RecordBatchWriter.R
index eb0a9c6..7185dc2 100644
--- a/r/R/RecordBatchWriter.R
+++ b/r/R/RecordBatchWriter.R
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-#' @include R6.R
+#' @include arrow-package.R
 
 #' @title class arrow::ipc::RecordBatchWriter
 #'
diff --git a/r/R/Schema.R b/r/R/Schema.R
index 0d47196..906841b 100644
--- a/r/R/Schema.R
+++ b/r/R/Schema.R
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-#' @include R6.R
+#' @include arrow-package.R
 
 #' @title class arrow::Schema
 #'
@@ -60,13 +60,19 @@
   lhs$Equals(rhs)
 }
 
-#' Schema factory
+#' Create a schema
 #'
-#' @param ... named list of data types
+#' This function lets you define a schema for a table. This is useful when you
+#' want to convert an R `data.frame` to Arrow but don't want to rely on the
+#' default mapping of R types to Arrow types, such as when you want to choose a
+#' specific numeric precision.
 #'
-#' @return a [schema][arrow__Schema]
+#' @param ... named list of [data types][data-type]
+#'
+#' @return A [schema][arrow__Schema] object.
 #'
 #' @export
+# TODO (npr): add examples once ARROW-5505 merges
 schema <- function(...){
   shared_ptr(`arrow::Schema`, schema_(.fields(list2(...))))
 }
diff --git a/r/R/Struct.R b/r/R/Struct.R
index 820e1a8..fa35b7e 100644
--- a/r/R/Struct.R
+++ b/r/R/Struct.R
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-#' @include R6.R
+#' @include type.R
 
 `arrow::StructType` <- R6Class("arrow::StructType",
   inherit = `arrow::NestedType`,
@@ -25,7 +25,7 @@
   )
 )
 
-#' @rdname DataType
+#' @rdname data-type
 #' @export
 struct <- function(...){
   xp <- struct_(.fields(list(...)))
diff --git a/r/R/Table.R b/r/R/Table.R
index 15ea48f..1255c8a 100644
--- a/r/R/Table.R
+++ b/r/R/Table.R
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-#' @include R6.R
+#' @include arrow-package.R
 #'
 #' @title class arrow::Table
 #'
diff --git a/r/R/array.R b/r/R/array.R
index deb3bc5..fd7c6ef 100644
--- a/r/R/array.R
+++ b/r/R/array.R
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-#' @include R6.R
+#' @include arrow-package.R
 
 #' @title class arrow::Array
 #'
diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index 86e909e..0f0a26b 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -42,3 +42,31 @@ arrow_available <- function() {
 option_use_threads <- function() {
   !is_false(getOption("arrow.use_threads"))
 }
+
+#' @include enums.R
+`arrow::Object` <- R6Class("arrow::Object",
+  public = list(
+    initialize = function(xp) self$set_pointer(xp),
+
+    pointer = function() self$`.:xp:.`,
+    `.:xp:.` = NULL,
+    set_pointer = function(xp){
+      self$`.:xp:.` <- xp
+    },
+    print = function(...){
+      cat(class(self)[[1]], "\n")
+      if (!is.null(self$ToString)){
+        cat(self$ToString(), "\n")
+      }
+      invisible(self)
+    }
+  )
+)
+
+shared_ptr <- function(class, xp) {
+  if (!shared_ptr_is_null(xp)) class$new(xp)
+}
+
+unique_ptr <- function(class, xp) {
+  if (!unique_ptr_is_null(xp)) class$new(xp)
+}
diff --git a/r/R/buffer.R b/r/R/buffer.R
index 327d853..12d0699 100644
--- a/r/R/buffer.R
+++ b/r/R/buffer.R
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-#' @include R6.R
+#' @include arrow-package.R
 #' @include enums.R
 
 #' @title class arrow::Buffer
diff --git a/r/R/compression.R b/r/R/compression.R
index 399fcb8..6e56a76 100644
--- a/r/R/compression.R
+++ b/r/R/compression.R
@@ -16,7 +16,7 @@
 # under the License.
 
 #' @include enums.R
-#' @include R6.R
+#' @include arrow-package.R
 #' @include io.R
 
 `arrow::util::Codec` <- R6Class("arrow::util::Codec", inherit = `arrow::Object`)
diff --git a/r/R/csv.R b/r/R/csv.R
index 3c5e5b7..202d571 100644
--- a/r/R/csv.R
+++ b/r/R/csv.R
@@ -189,7 +189,7 @@ read_tsv_arrow <- function(file,
   eval.parent(mc)
 }
 
-#' @include R6.R
+#' @include arrow-package.R
 
 `arrow::csv::TableReader` <- R6Class("arrow::csv::TableReader", inherit = `arrow::Object`,
   public = list(
diff --git a/r/R/dictionary.R b/r/R/dictionary.R
index bfe2373..9262a51 100644
--- a/r/R/dictionary.R
+++ b/r/R/dictionary.R
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-#' @include R6.R
+#' @include type.R
 
 #' @title class arrow::DictionaryType
 #'
@@ -40,14 +40,14 @@
   )
 )
 
-#' dictionary type factory
+#' Create a dictionary type
 #'
 #' @param index_type index type, e.g. [int32()]
 #' @param value_type value type, probably [utf8()]
 #' @param ordered Is this an ordered dictionary ?
 #'
-#' @return a [arrow::DictionaryType][arrow__DictionaryType]
-#'
+#' @return An [arrow::DictionaryType][arrow__DictionaryType]
+#' @seealso [Other Arrow data types][data-type]
 #' @export
 dictionary <- function(index_type, value_type, ordered = FALSE) {
   assert_that(
diff --git a/r/R/enums.R b/r/R/enums.R
index e45277b..5c24ce8 100644
--- a/r/R/enums.R
+++ b/r/R/enums.R
@@ -27,17 +27,19 @@ enum <- function(class, ..., .list = list(...)){
   )
 }
 
-#' @rdname DataType
+#' Arrow enums
+#' @name enums
 #' @export
+#' @keywords internal
 TimeUnit <- enum("arrow::TimeUnit::type",
   SECOND = 0L, MILLI = 1L, MICRO = 2L, NANO = 3L
 )
 
-#' @rdname DataType
+#' @rdname enums
 #' @export
 DateUnit <- enum("arrow::DateUnit", DAY = 0L, MILLI = 1L)
 
-#' @rdname DataType
+#' @rdname enums
 #' @export
 Type <- enum("arrow::Type::type",
   "NA" = 0L, BOOL = 1L, UINT8 = 2L, INT8 = 3L, UINT16 = 4L, INT16 = 5L,
@@ -48,7 +50,7 @@ Type <- enum("arrow::Type::type",
   UNION = 25L, DICTIONARY = 26L, MAP = 27L
 )
 
-#' @rdname DataType
+#' @rdname enums
 #' @export
 StatusCode <- enum("arrow::StatusCode",
   OK = 0L, OutOfMemory = 1L, KeyError = 2L, TypeError = 3L,
@@ -59,19 +61,19 @@ StatusCode <- enum("arrow::StatusCode",
   PlasmaStoreFull = 22L, PlasmaObjectAlreadySealed = 23L
 )
 
-#' @rdname DataType
+#' @rdname enums
 #' @export
 FileMode <- enum("arrow::io::FileMode",
   READ = 0L, WRITE = 1L, READWRITE = 2L
 )
 
-#' @rdname DataType
+#' @rdname enums
 #' @export
 MessageType <- enum("arrow::ipc::Message::Type",
   NONE = 0L, SCHEMA = 1L, DICTIONARY_BATCH = 2L, RECORD_BATCH = 3L, TENSOR = 4L
 )
 
-#' @rdname DataType
+#' @rdname enums
 #' @export
 CompressionType <- enum("arrow::Compression::type",
   UNCOMPRESSED = 0L, SNAPPY = 1L, GZIP = 2L, BROTLI = 3L, ZSTD = 4L, LZ4 = 5L, LZO = 6L, BZ2 = 7L
diff --git a/r/R/feather.R b/r/R/feather.R
index 48123f7..46c3f5f 100644
--- a/r/R/feather.R
+++ b/r/R/feather.R
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-#' @include R6.R
+#' @include arrow-package.R
 
 `arrow::ipc::feather::TableWriter` <- R6Class("arrow::ipc::feather::TableWriter", inherit = `arrow::Object`,
   public = list(
diff --git a/r/R/io.R b/r/R/io.R
index 3169a18..59d573f 100644
--- a/r/R/io.R
+++ b/r/R/io.R
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-#' @include R6.R
+#' @include arrow-package.R
 #' @include enums.R
 #' @include buffer.R
 
diff --git a/r/R/json.R b/r/R/json.R
index 9573ff5..cd43231 100644
--- a/r/R/json.R
+++ b/r/R/json.R
@@ -15,9 +15,9 @@
 # specific language governing permissions and limitations
 # under the License.
 
-#' @include R6.R
+#' @include arrow-package.R
 
-#' @include R6.R
+#' @include arrow-package.R
 #'
 #' @title class arrow::json::TableReader
 #'
diff --git a/r/R/memory_pool.R b/r/R/memory_pool.R
index 88c2c7b..771e05b 100644
--- a/r/R/memory_pool.R
+++ b/r/R/memory_pool.R
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-#' @include R6.R
+#' @include arrow-package.R
 #'
 #' @title class arrow::MemoryPool
 #'
diff --git a/r/R/message.R b/r/R/message.R
index 98d9248..e0add59 100644
--- a/r/R/message.R
+++ b/r/R/message.R
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-#' @include R6.R
+#' @include arrow-package.R
 
 #' @title class arrow::ipc::Message
 #'
diff --git a/r/R/parquet.R b/r/R/parquet.R
index c76619c..6f122e5 100644
--- a/r/R/parquet.R
+++ b/r/R/parquet.R
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-#' @include R6.R
+#' @include arrow-package.R
 
 `parquet::arrow::FileReader` <- R6Class("parquet::arrow::FileReader",
   inherit = `arrow::Object`,
diff --git a/r/R/R6.R b/r/R/type.R
similarity index 83%
rename from r/R/R6.R
rename to r/R/type.R
index 06dd6f0..b763ed8 100644
--- a/r/R/R6.R
+++ b/r/R/type.R
@@ -15,33 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-#' @include enums.R
-`arrow::Object` <- R6Class("arrow::Object",
-  public = list(
-    initialize = function(xp) self$set_pointer(xp),
-
-    pointer = function() self$`.:xp:.`,
-    `.:xp:.` = NULL,
-    set_pointer = function(xp){
-      self$`.:xp:.` <- xp
-    },
-    print = function(...){
-      cat(class(self)[[1]], "\n")
-      if (!is.null(self$ToString)){
-        cat(self$ToString(), "\n")
-      }
-      invisible(self)
-    }
-  )
-)
-
-shared_ptr <- function(class, xp) {
-  if (!shared_ptr_is_null(xp)) class$new(xp)
-}
-
-unique_ptr <- function(class, xp) {
-  if (!unique_ptr_is_null(xp)) class$new(xp)
-}
+#' @include arrow-package.R
 
 #' @export
 `!=.arrow::Object` <- function(lhs, rhs){
@@ -280,88 +254,121 @@ type.default <- function(x) {
 
 #' Apache Arrow data types
 #'
-#' Apache Arrow data types
+#' These functions create type objects corresponding to Arrow types. Use them
+#' when defining a [schema()] or as inputs to other types, like `struct`. Most
+#' of these functions don't take arguments, but a few do.
 #'
-#' @param unit time unit
-#' @param timezone time zone
-#' @param precision precision
-#' @param scale scale
-#' @param type type
-#' @param ... ...
+#' A few functions have aliases:
 #'
-#' @rdname DataType
+#' * `utf8()` and `string()`
+#' * `float16()` and `halffloat()`
+#' * `float32()` and `float()`
+#' * `bool()` and `boolean()`
+#' * Called from `schema()` or `struct()`, `double()` also is supported as a
+#' way of creating a `float64()`
+#'
+#' @param unit For date/time types, the time unit (day, second, millisecond, etc.)
+#' @param timezone For `timestamp()`, an optional time zone.
+#' @param precision For `decimal()`, precision
+#' @param scale For `decimal()`, scale
+#' @param type For `list_of()`, a data type to make a list-of-type
+#' @param ... For `struct()`, a named list of types to define the struct columns
+#'
+#' @name data-type
 #' @export
+#' @seealso [dictionary()] for creating a dictionary (factor-like) type.
+#' @examples
+#' \donttest{
+#' bool()
+#' struct(a = int32(), b = double())
+#' }
 int8 <- function() shared_ptr(`arrow::Int8`, Int8__initialize())
 
-#' @rdname DataType
+#' @rdname data-type
 #' @export
 int16 <- function() shared_ptr(`arrow::Int16`, Int16__initialize())
 
-#' @rdname DataType
+#' @rdname data-type
 #' @export
 int32 <- function() shared_ptr(`arrow::Int32`, Int32__initialize())
 
-#' @rdname DataType
+#' @rdname data-type
 #' @export
 int64 <- function() shared_ptr(`arrow::Int64`, Int64__initialize())
 
-#' @rdname DataType
+#' @rdname data-type
 #' @export
 uint8 <- function() shared_ptr(`arrow::UInt8`, UInt8__initialize())
 
-#' @rdname DataType
+#' @rdname data-type
 #' @export
 uint16 <- function() shared_ptr(`arrow::UInt16`, UInt16__initialize())
 
-#' @rdname DataType
+#' @rdname data-type
 #' @export
 uint32 <- function() shared_ptr(`arrow::UInt32`, UInt32__initialize())
 
-#' @rdname DataType
+#' @rdname data-type
 #' @export
 uint64 <- function() shared_ptr(`arrow::UInt64`, UInt64__initialize())
 
-#' @rdname DataType
+#' @rdname data-type
 #' @export
 float16 <- function() shared_ptr(`arrow::Float16`,  Float16__initialize())
 
-#' @rdname DataType
+#' @rdname data-type
+#' @export
+halffloat <- float16
+
+#' @rdname data-type
 #' @export
 float32 <- function() shared_ptr(`arrow::Float32`, Float32__initialize())
 
-#' @rdname DataType
+#' @rdname data-type
+#' @export
+float <- float32
+
+#' @rdname data-type
 #' @export
 float64 <- function() shared_ptr(`arrow::Float64`, Float64__initialize())
 
-#' @rdname DataType
+#' @rdname data-type
 #' @export
 boolean <- function() shared_ptr(`arrow::Boolean`, Boolean__initialize())
 
-#' @rdname DataType
+#' @rdname data-type
+#' @export
+bool <- boolean
+
+#' @rdname data-type
 #' @export
 utf8 <- function() shared_ptr(`arrow::Utf8`, Utf8__initialize())
 
-#' @rdname DataType
+#' @rdname data-type
+#' @export
+string <- utf8
+
+#' @rdname data-type
 #' @export
 date32 <- function() shared_ptr(`arrow::Date32`, Date32__initialize())
 
-#' @rdname DataType
+#' @rdname data-type
 #' @export
 date64 <- function() shared_ptr(`arrow::Date64`, Date64__initialize())
 
-#' @rdname DataType
+#' @rdname data-type
 #' @export
 time32 <- function(unit) shared_ptr(`arrow::Time32`, Time32__initialize(unit))
 
-#' @rdname DataType
+#' @rdname data-type
 #' @export
 time64 <- function(unit) shared_ptr(`arrow::Time64`, Time64__initialize(unit))
 
-#' @rdname DataType
+#' @rdname data-type
 #' @export
 null <- function() shared_ptr(`arrow::Null`, Null__initialize())
 
-#' @rdname DataType
+#' @rdname data-type
 #' @export
 timestamp <- function(unit, timezone) {
   if (missing(timezone)) {
@@ -371,7 +378,7 @@ timestamp <- function(unit, timezone) {
   }
 }
 
-#' @rdname DataType
+#' @rdname data-type
 #' @export
 decimal <- function(precision, scale) shared_ptr(`arrow::Decimal128Type`, Decimal128Type__initialize(precision, scale))
 
diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml
index 552eff9..98baa35 100644
--- a/r/_pkgdown.yml
+++ b/r/_pkgdown.yml
@@ -66,15 +66,18 @@ reference:
   - array
   - chunked_array
   - record_batch
-  - schema
   - table
+  - read_message
+  - read_record_batch
+  - read_table
+- title: Arrow data types and schema
+  contents:
+  - schema
   - type
   - dictionary
   - field
-  - read_message
-  - read_record_batch
   - read_schema
-  - read_table
+  - data-type
 - title: R6 classes
   contents:
   - arrow__Array
@@ -116,7 +119,6 @@ reference:
   - BufferReader
   - CompressedInputStream
   - CompressedOutputStream
-  - TimeUnit
   - FeatherTableReader
   - FeatherTableWriter
   - FileOutputStream
diff --git a/r/man/DataType.Rd b/r/man/DataType.Rd
deleted file mode 100644
index bf5f1d4..0000000
--- a/r/man/DataType.Rd
+++ /dev/null
@@ -1,111 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/enums.R, R/R6.R, R/List.R, R/Struct.R
-\docType{data}
-\name{TimeUnit}
-\alias{TimeUnit}
-\alias{DateUnit}
-\alias{Type}
-\alias{StatusCode}
-\alias{FileMode}
-\alias{MessageType}
-\alias{CompressionType}
-\alias{int8}
-\alias{int16}
-\alias{int32}
-\alias{int64}
-\alias{uint8}
-\alias{uint16}
-\alias{uint32}
-\alias{uint64}
-\alias{float16}
-\alias{float32}
-\alias{float64}
-\alias{boolean}
-\alias{utf8}
-\alias{date32}
-\alias{date64}
-\alias{time32}
-\alias{time64}
-\alias{null}
-\alias{timestamp}
-\alias{decimal}
-\alias{list_of}
-\alias{struct}
-\title{Apache Arrow data types}
-\format{An object of class \code{arrow::TimeUnit::type} (inherits from \code{arrow-enum}) of length 4.}
-\usage{
-TimeUnit
-
-DateUnit
-
-Type
-
-StatusCode
-
-FileMode
-
-MessageType
-
-CompressionType
-
-int8()
-
-int16()
-
-int32()
-
-int64()
-
-uint8()
-
-uint16()
-
-uint32()
-
-uint64()
-
-float16()
-
-float32()
-
-float64()
-
-boolean()
-
-utf8()
-
-date32()
-
-date64()
-
-time32(unit)
-
-time64(unit)
-
-null()
-
-timestamp(unit, timezone)
-
-decimal(precision, scale)
-
-list_of(type)
-
-struct(...)
-}
-\arguments{
-\item{unit}{time unit}
-
-\item{timezone}{time zone}
-
-\item{precision}{precision}
-
-\item{scale}{scale}
-
-\item{type}{type}
-
-\item{...}{...}
-}
-\description{
-Apache Arrow data types
-}
-\keyword{datasets}
diff --git a/r/man/arrow__DataType.Rd b/r/man/arrow__DataType.Rd
index 53bd632..4eeb051 100644
--- a/r/man/arrow__DataType.Rd
+++ b/r/man/arrow__DataType.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/R6.R
+% Please edit documentation in R/type.R
 \docType{class}
 \name{arrow__DataType}
 \alias{arrow__DataType}
diff --git a/r/man/arrow__FixedWidthType.Rd b/r/man/arrow__FixedWidthType.Rd
index 610a400..075c0ee 100644
--- a/r/man/arrow__FixedWidthType.Rd
+++ b/r/man/arrow__FixedWidthType.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/R6.R
+% Please edit documentation in R/type.R
 \docType{class}
 \name{arrow__FixedWidthType}
 \alias{arrow__FixedWidthType}
diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd
new file mode 100644
index 0000000..316ff49
--- /dev/null
+++ b/r/man/data-type.Rd
@@ -0,0 +1,122 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/type.R, R/List.R, R/Struct.R
+\name{data-type}
+\alias{data-type}
+\alias{int8}
+\alias{int16}
+\alias{int32}
+\alias{int64}
+\alias{uint8}
+\alias{uint16}
+\alias{uint32}
+\alias{uint64}
+\alias{float16}
+\alias{halffloat}
+\alias{float32}
+\alias{float}
+\alias{float64}
+\alias{boolean}
+\alias{bool}
+\alias{utf8}
+\alias{string}
+\alias{date32}
+\alias{date64}
+\alias{time32}
+\alias{time64}
+\alias{null}
+\alias{timestamp}
+\alias{decimal}
+\alias{list_of}
+\alias{struct}
+\title{Apache Arrow data types}
+\usage{
+int8()
+
+int16()
+
+int32()
+
+int64()
+
+uint8()
+
+uint16()
+
+uint32()
+
+uint64()
+
+float16()
+
+halffloat()
+
+float32()
+
+float()
+
+float64()
+
+boolean()
+
+bool()
+
+utf8()
+
+string()
+
+date32()
+
+date64()
+
+time32(unit)
+
+time64(unit)
+
+null()
+
+timestamp(unit, timezone)
+
+decimal(precision, scale)
+
+list_of(type)
+
+struct(...)
+}
+\arguments{
+\item{unit}{For date/time types, the time unit (day, second, millisecond, etc.)}
+
+\item{timezone}{For \code{timestamp()}, an optional time zone.}
+
+\item{precision}{For \code{decimal()}, precision}
+
+\item{scale}{For \code{decimal()}, scale}
+
+\item{type}{For \code{list_of()}, a data type to make a list-of-type}
+
+\item{...}{For \code{struct()}, a named list of types to define the struct columns}
+}
+\description{
+These functions create type objects corresponding to Arrow types. Use them
+when defining a \code{\link[=schema]{schema()}} or as inputs to other types, like \code{struct}. Most
+of these functions don't take arguments, but a few do.
+}
+\details{
+A few functions have aliases:
+\itemize{
+\item \code{utf8()} and \code{string()}
+\item \code{float16()} and \code{halffloat()}
+\item \code{float32()} and \code{float()}
+\item \code{bool()} and \code{boolean()}
+\item Called from \code{schema()} or \code{struct()}, \code{double()} also is supported as a
+way of creating a \code{float64()}
+}
+}
+\examples{
+\donttest{
+bool()
+struct(a = int32(), b = double())
+}
+}
+\seealso{
+\code{\link[=dictionary]{dictionary()}} for creating a dictionary (factor-like) type.
+}
diff --git a/r/man/dictionary.Rd b/r/man/dictionary.Rd
index 9662328..334d67e 100644
--- a/r/man/dictionary.Rd
+++ b/r/man/dictionary.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/dictionary.R
 \name{dictionary}
 \alias{dictionary}
-\title{dictionary type factory}
+\title{Create a dictionary type}
 \usage{
 dictionary(index_type, value_type, ordered = FALSE)
 }
@@ -14,8 +14,11 @@ dictionary(index_type, value_type, ordered = FALSE)
 \item{ordered}{Is this an ordered dictionary ?}
 }
 \value{
-a \link[=arrow__DictionaryType]{arrow::DictionaryType}
+An \link[=arrow__DictionaryType]{arrow::DictionaryType}
 }
 \description{
-dictionary type factory
+Create a dictionary type
+}
+\seealso{
+\link[=data-type]{Other Arrow data types}
 }
diff --git a/r/man/enums.Rd b/r/man/enums.Rd
new file mode 100644
index 0000000..c55170e
--- /dev/null
+++ b/r/man/enums.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/enums.R
+\docType{data}
+\name{enums}
+\alias{enums}
+\alias{TimeUnit}
+\alias{DateUnit}
+\alias{Type}
+\alias{StatusCode}
+\alias{FileMode}
+\alias{MessageType}
+\alias{CompressionType}
+\title{Arrow enums}
+\format{An object of class \code{arrow::TimeUnit::type} (inherits from \code{arrow-enum}) of length 4.}
+\usage{
+TimeUnit
+
+DateUnit
+
+Type
+
+StatusCode
+
+FileMode
+
+MessageType
+
+CompressionType
+}
+\description{
+Arrow enums
+}
+\keyword{datasets}
+\keyword{internal}
diff --git a/r/man/schema.Rd b/r/man/schema.Rd
index ad3bcb1..622e5a7 100644
--- a/r/man/schema.Rd
+++ b/r/man/schema.Rd
@@ -2,16 +2,19 @@
 % Please edit documentation in R/Schema.R
 \name{schema}
 \alias{schema}
-\title{Schema factory}
+\title{Create a schema}
 \usage{
 schema(...)
 }
 \arguments{
-\item{...}{named list of data types}
+\item{...}{named list of \link[=data-type]{data types}}
 }
 \value{
-a \link[=arrow__Schema]{schema}
+A \link[=arrow__Schema]{schema} object.
 }
 \description{
-Schema factory
+This function lets you define a schema for a table. This is useful when you
+want to convert an R \code{data.frame} to Arrow but don't want to rely on the
+default mapping of R types to Arrow types, such as when you want to choose a
+specific numeric precision.
 }
diff --git a/r/man/type.Rd b/r/man/type.Rd
index 3e2b4f4..2f85e4a 100644
--- a/r/man/type.Rd
+++ b/r/man/type.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/R6.R
+% Please edit documentation in R/type.R
 \name{type}
 \alias{type}
 \title{infer the arrow Array type from an R vector}
diff --git a/r/tests/testthat/test-DataType.R b/r/tests/testthat/test-data-type.R
similarity index 100%
rename from r/tests/testthat/test-DataType.R
rename to r/tests/testthat/test-data-type.R
diff --git a/r/tests/testthat/test-field.R b/r/tests/testthat/test-field.R
index aaa2875..5d63a7f 100644
--- a/r/tests/testthat/test-field.R
+++ b/r/tests/testthat/test-field.R
@@ -24,3 +24,7 @@ test_that("field() factory", {
   expect_true(x == x)
   expect_false(x == field("x", int64()))
 })
+
+test_that("Field validation", {
+  expect_error(schema(b = 32), "b must be arrow::DataType, not numeric")
+})
diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R
index ff40b81..387aee9 100644
--- a/r/tests/testthat/test-schema.R
+++ b/r/tests/testthat/test-schema.R
@@ -17,6 +17,13 @@
 
 context("arrow::Schema")
 
+test_that("Alternate type names are supported", {
+  expect_equal(
+    schema(b = double(), c = bool(), d = string(), e = float(), f = halffloat()),
+    schema(b = float64(), c = boolean(), d = utf8(), e = float32(), f = float16())
+  )
+})
+
 test_that("reading schema from Buffer", {
   # TODO: this uses the streaming format, i.e. from RecordBatchStreamWriter
   #       maybe there is an easier way to serialize a schema