You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ro...@apache.org on 2021/09/16 09:10:25 UTC

[arrow] branch ARROW_13112_altrep_strings created (now 24256d0)

This is an automated email from the ASF dual-hosted git repository.

romainfrancois pushed a change to branch ARROW_13112_altrep_strings
in repository https://gitbox.apache.org/repos/asf/arrow.git.


      at 24256d0  use static methods in AltrepArrayString

This branch includes the following new commits:

     new 53cc439  unused
     new 24256d0  use static methods in AltrepArrayString

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


[arrow] 01/02: unused

Posted by ro...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

romainfrancois pushed a commit to branch ARROW_13112_altrep_strings
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit 53cc439d1d571501ad24fca42ad8e9343a4e2c59
Author: Romain Francois <ro...@rstudio.com>
AuthorDate: Thu Sep 9 09:19:01 2021 +0200

    unused
---
 r/src/altrep.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp
index ec68ade..804a580 100644
--- a/r/src/altrep.cpp
+++ b/r/src/altrep.cpp
@@ -26,9 +26,6 @@
 #include <cpp11/altrep.hpp>
 #if defined(HAS_ALTREP)
 
-// defined in array_to_vector.cpp
-SEXP Array__as_vector(const std::shared_ptr<arrow::Array>& array);
-
 #if R_VERSION < R_Version(3, 6, 0)
 
 // workaround because R's <R_ext/Altrep.h> not so conveniently uses `class`

[arrow] 02/02: use static methods in AltrepArrayString

Posted by ro...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

romainfrancois pushed a commit to branch ARROW_13112_altrep_strings
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit 24256d00bcff4329fef65d90fe1a2d1a30551e5c
Author: Romain Francois <ro...@rstudio.com>
AuthorDate: Thu Sep 16 11:07:36 2021 +0200

    use static methods in AltrepArrayString
---
 r/R/arrowExports.R                    |   5 +
 r/src/altrep.cpp                      | 309 +++++++++++++++++++++++++++++++++-
 r/src/arrowExports.cpp                |  17 ++
 r/tests/testthat/test-Array.R         |  58 ++++++-
 r/tests/testthat/test-RecordBatch.R   |  21 ++-
 r/tests/testthat/test-altrep.R        |   9 +
 r/tests/testthat/test-chunked-array.R |  26 ++-
 r/tests/testthat/test-scalar.R        |  18 +-
 8 files changed, 431 insertions(+), 32 deletions(-)

diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index b852a3d..5913321 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -4,6 +4,10 @@ is_altrep <- function(x) {
   .Call(`_arrow_is_altrep`, x)
 }
 
+test_SET_STRING_ELT <- function(s) {
+  invisible(.Call(`_arrow_test_SET_STRING_ELT`, s))
+}
+
 Array__Slice1 <- function(array, offset) {
   .Call(`_arrow_Array__Slice1`, array, offset)
 }
@@ -1771,3 +1775,4 @@ SetIOThreadPoolCapacity <- function(threads) {
 Array__infer_type <- function(x) {
   .Call(`_arrow_Array__infer_type`, x)
 }
+
diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp
index 804a580..e8859ab 100644
--- a/r/src/altrep.cpp
+++ b/r/src/altrep.cpp
@@ -24,6 +24,7 @@
 #include <arrow/util/bitmap_reader.h>
 
 #include <cpp11/altrep.hpp>
+#include <cpp11/declarations.hpp>
 #if defined(HAS_ALTREP)
 
 #if R_VERSION < R_Version(3, 6, 0)
@@ -279,13 +280,284 @@ struct AltrepArrayPrimitive {
   static SEXP Unserialize(SEXP /* class_ */, SEXP state) { return state; }
 
   SEXP Coerce(int type) {
-    // Just let R handle it for now
-    return NULL;
+    Materialize();
+    return Rf_coerceVector(R_altrep_data2(alt_), type);
   }
 };
 template <int sexp_type>
 R_altrep_class_t AltrepArrayPrimitive<sexp_type>::class_t;
 
+struct AltrepArrayString {
+  static R_altrep_class_t class_t;
+
+  static void DeleteArray(std::shared_ptr<Array>* ptr) { delete ptr; }
+  using Pointer = cpp11::external_pointer<std::shared_ptr<Array>, DeleteArray>;
+
+  static SEXP Make(const std::shared_ptr<Array>& array) {
+    SEXP alt_ = R_new_altrep(class_t, Pointer(new std::shared_ptr<Array>(array)), R_NilValue);
+    MARK_NOT_MUTABLE(alt_);
+    return alt_;
+  }
+
+  static std::shared_ptr<Array> array(SEXP alt_) {
+    SEXP data1_ = R_altrep_data1(alt_);
+    if (Rf_isNull(data1_)) {
+      return nullptr;
+    }
+    return *Pointer(data1_);
+  }
+
+  static bool IsExpanded(SEXP alt_) {
+    return !Rf_isNull(R_altrep_data2(alt_));
+  }
+
+  static bool IsComplete(SEXP alt_) {
+    return Rf_isNull(R_altrep_data1(alt_));
+  }
+
+  static void ReleaseArray(SEXP alt_) {
+    R_set_altrep_data1(alt_, R_NilValue);
+  }
+
+  static SEXP Expand(SEXP alt_) {
+    if (!IsExpanded(alt_)) {
+      auto array_ = array(alt_);
+      R_xlen_t n = array_->length();
+      SEXP data2_ = PROTECT(Rf_allocVector(STRSXP, n));
+      if (n == 0) {
+        ReleaseArray(alt_);
+      } else {
+        // set individual strings to NULL (not yet materialized)
+        memset(STDVEC_DATAPTR(data2_), 0, n * sizeof(SEXP));
+      }
+      R_set_altrep_data2(alt_, data2_);
+      UNPROTECT(1);
+    }
+
+    return R_altrep_data2(alt_);
+  }
+
+  static R_xlen_t Length(SEXP alt_) {
+    return IsExpanded(alt_) ? XLENGTH(R_altrep_data2(alt_)) : array(alt_)->length();
+  }
+
+  static int No_NA(SEXP alt_) {
+    if (!IsExpanded(alt_)) return array(alt_)->null_count() == 0;
+
+    SEXP data2_ = R_altrep_data2(alt_);
+    R_xlen_t n = XLENGTH(data2_);
+    const SEXP* data2_ptr = STRING_PTR_RO(data2_);
+
+    if (IsComplete(alt_)) {
+      for (R_xlen_t i = 0; i < n; i++) {
+        if (data2_ptr[i] == NA_STRING) return false;
+      }
+      return true;
+    } else {
+      auto array_ = array(alt_);
+      for (R_xlen_t i = 0; i < n; i++) {
+        // not yet expanded, but null in the Array
+        if (data2_ptr[i] == NULL && array_->IsNull(i)) return false;
+
+        // already expanded to NULL
+        if (data2_ptr[i] == NA_STRING) return false;
+      }
+      return true;
+    }
+  }
+
+  static int Is_sorted(SEXP alt_) {
+    return UNKNOWN_SORTEDNESS;
+  }
+
+  static SEXP Elt(SEXP alt_, R_xlen_t i) {
+    BEGIN_CPP11
+      if (IsComplete(alt_)) {
+        return STRING_ELT(R_altrep_data2(alt_), i);
+      }
+
+      // make sure data2 is initiated
+      Expand(alt_);
+      SEXP data2_ = R_altrep_data2(alt_);
+
+      // data2[i] was already generated
+      SEXP s = STRING_ELT(data2_, i);
+      if (s != NULL) {
+        return s;
+      }
+
+      // data2[i] is missing
+      auto array_ = array(alt_);
+      if (array_->IsNull(i)) {
+        SET_STRING_ELT(data2_, i, NA_STRING);
+        return NA_STRING;
+      }
+
+      // data2[i] is a string, but we need care about embedded nuls
+      auto view = static_cast<StringArray*>(array_.get())->GetView(i);
+      const bool strip_out_nuls = GetBoolOption("arrow.skip_nul", false);
+      bool nul_was_stripped = false;
+      std::string stripped_string;
+
+      // both cases might jump, although it's less likely when
+      // nuls are stripped, but still we need the unwind protection
+      // so that C++ objects here are correctly destructed, whilst errors
+      // properly pass through to the R side
+      cpp11::unwind_protect([&](){
+        if (strip_out_nuls) {
+          s = r_string_from_view_strip_nul(view, stripped_string, &nul_was_stripped);
+        } else {
+          s = r_string_from_view(view);
+        }
+
+        if (nul_was_stripped) {
+          cpp11::warning("Stripping '\\0' (nul) from character vector");
+        }
+
+      });
+      SET_STRING_ELT(data2_, i, s);
+      return s;
+    END_CPP11
+  }
+
+  static void* Dataptr(SEXP alt_, Rboolean writeable) {
+    return DATAPTR(Complete(alt_));
+  }
+
+  static SEXP Complete(SEXP alt_) {
+    BEGIN_CPP11
+
+      if (!IsComplete(alt_)) {
+        Expand(alt_);
+        auto array_ = array(alt_);
+        SEXP data2_ = R_altrep_data2(alt_);
+        R_xlen_t n = XLENGTH(data2_);
+
+        std::string stripped_string;
+        const bool strip_out_nuls = GetBoolOption("arrow.skip_nul", false);
+        bool nul_was_stripped = false;
+        auto* string_array = static_cast<StringArray*>(array_.get());
+        util::string_view view;
+
+        cpp11::unwind_protect([&](){
+          for (R_xlen_t i = 0; i < n; i++) {
+            SEXP s = STRING_ELT(data2_, i);
+            if (s != NULL) {
+              continue;
+            }
+
+            if (array_->IsNull(i)) {
+              SET_STRING_ELT(data2_, i, NA_STRING);
+              continue;
+            }
+
+            view = string_array->GetView(i);
+            if (strip_out_nuls) {
+              s = r_string_from_view_strip_nul(view, stripped_string, &nul_was_stripped);
+            } else {
+              s = r_string_from_view(view);
+            }
+            SET_STRING_ELT(data2_, i, s);
+          }
+
+          if (nul_was_stripped) {
+            cpp11::warning("Stripping '\\0' (nul) from character vector");
+          }
+
+        });
+
+        ReleaseArray(alt_);
+      }
+      return R_altrep_data2(alt_);
+
+    END_CPP11
+  }
+
+  static const void* Dataptr_or_null(SEXP alt_) {
+    if (IsComplete(alt_)) return DATAPTR(R_altrep_data2(alt_));
+    return NULL;
+  }
+
+  static SEXP Serialized_state(SEXP alt_) {
+    return Complete(alt_);
+  }
+
+  static SEXP Unserialize(SEXP /* class_ */, SEXP state) {
+    return state;
+  }
+
+  static SEXP Duplicate(SEXP alt_, Rboolean /* deep */) {
+    return Rf_lazy_duplicate(Complete(alt_));
+  }
+
+  static Rboolean Inspect(SEXP alt_, int pre, int deep, int pvec,
+                   void (*inspect_subtree)(SEXP, int, int, int)) {
+    if (IsComplete(alt_)) {
+      inspect_subtree(R_altrep_data2(alt_), pre, deep, pvec);
+      return TRUE;
+    }
+
+    const auto& array_ = array(alt_);
+    Rprintf("arrow::Array<%s, %d nulls> len=%d, Array=<%p>\n",
+            array_->type()->ToString().c_str(), array_->null_count(),
+            array_->length(),
+            array_.get());
+    inspect_subtree(R_altrep_data1(alt_), pre, deep + 1, pvec);
+
+    return TRUE;
+  }
+
+  static SEXP Coerce(SEXP alt_, int type) {
+    return Rf_coerceVector(Complete(alt_), type);
+  }
+
+  // static method so that this can error without concerns of
+  // destruction for the
+  static void Set_elt(SEXP alt_, R_xlen_t i, SEXP v) {
+    Rf_error("ALTSTRING objects of type <arrow::array_string_vector> are immutable");
+  }
+
+  static SEXP r_string_from_view_strip_nul(arrow::util::string_view view, std::string& stripped_string,
+                                           bool* nul_was_stripped) {
+    const char* old_string = view.data();
+
+    size_t stripped_len = 0, nul_count = 0;
+
+    for (size_t i = 0; i < view.size(); i++) {
+      if (old_string[i] == '\0') {
+        ++nul_count;
+
+        if (nul_count == 1) {
+          // first nul spotted: allocate stripped string storage
+          stripped_string = view.to_string();
+          stripped_len = i;
+        }
+
+        // don't copy old_string[i] (which is \0) into stripped_string
+        continue;
+      }
+
+      if (nul_count > 0) {
+        stripped_string[stripped_len++] = old_string[i];
+      }
+    }
+
+    if (nul_count > 0) {
+      *nul_was_stripped = true;
+      stripped_string.resize(stripped_len);
+      return r_string_from_view(stripped_string);
+    }
+
+    return r_string_from_view(view);
+  }
+
+  static SEXP r_string_from_view(arrow::util::string_view view) {
+    return Rf_mkCharLenCE(view.data(), view.size(), CE_UTF8);
+  }
+};
+
+R_altrep_class_t AltrepArrayString::class_t;
+
 // The methods below are how R interacts with the altrep objects.
 //
 // They all use the same pattern: create a C++ object of the
@@ -493,10 +765,31 @@ void InitAltIntegerClass(DllInfo* dll, const char* name) {
   InitAltIntegerMethods<AltrepClass>(AltrepClass::class_t, dll);
 }
 
+template <typename AltrepClass>
+void InitAltStringClass(DllInfo* dll, const char* name) {
+  AltrepClass::class_t = R_make_altstring_class(name, "arrow", dll);
+  R_set_altrep_Length_method(AltrepClass::class_t, AltrepClass::Length);
+  R_set_altrep_Inspect_method(AltrepClass::class_t, AltrepClass::Inspect);
+  R_set_altrep_Duplicate_method(AltrepClass::class_t, AltrepClass::Duplicate);
+  R_set_altrep_Serialized_state_method(AltrepClass::class_t, AltrepClass::Serialized_state);
+  R_set_altrep_Unserialize_method(AltrepClass::class_t, AltrepClass::Unserialize);
+  R_set_altrep_Coerce_method(AltrepClass::class_t, AltrepClass::Coerce);
+
+  R_set_altvec_Dataptr_method(AltrepClass::class_t, AltrepClass::Dataptr);
+  R_set_altvec_Dataptr_or_null_method(AltrepClass::class_t, AltrepClass::Dataptr_or_null);
+
+  R_set_altstring_Elt_method(AltrepClass::class_t, AltrepClass::Elt);
+  R_set_altstring_Set_elt_method(AltrepClass::class_t, AltrepClass::Set_elt);
+  R_set_altstring_No_NA_method(AltrepClass::class_t, AltrepClass::No_NA);
+  R_set_altstring_Is_sorted_method(AltrepClass::class_t, AltrepClass::Is_sorted);
+}
+
 // initialize the altrep classes
 void Init_Altrep_classes(DllInfo* dll) {
-  InitAltRealClass<AltrepArrayPrimitive<REALSXP>>(dll, "array_dbl_vector");
-  InitAltIntegerClass<AltrepArrayPrimitive<INTSXP>>(dll, "array_int_vector");
+  InitAltRealClass<AltrepArrayPrimitive<REALSXP>>(dll, "arrow::array_dbl_vector");
+  InitAltIntegerClass<AltrepArrayPrimitive<INTSXP>>(dll, "arrow::array_int_vector");
+
+  InitAltStringClass<AltrepArrayString>(dll, "arrow::array_string_vector");
 }
 
 // return an altrep R vector that shadows the array if possible
@@ -508,6 +801,9 @@ SEXP MakeAltrepArrayPrimitive(const std::shared_ptr<Array>& array) {
     case arrow::Type::INT32:
       return altrep::AltrepArrayPrimitive<INTSXP>(array).alt_;
 
+    case arrow::Type::STRING:
+      return altrep::AltrepArrayString::Make(array);
+
     default:
       break;
   }
@@ -530,4 +826,9 @@ bool is_altrep(SEXP x) {
 #endif
 }
 
+// [[arrow::export]]
+void test_SET_STRING_ELT(SEXP s) {
+  SET_STRING_ELT(s, 0, Rf_mkChar("forbidden"));
+}
+
 #endif
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index f33b81c..99c104f 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -19,6 +19,22 @@ extern "C" SEXP _arrow_is_altrep(SEXP x_sexp){
 }
 #endif
 
+// altrep.cpp
+#if defined(ARROW_R_WITH_ARROW)
+void test_SET_STRING_ELT(SEXP s);
+extern "C" SEXP _arrow_test_SET_STRING_ELT(SEXP s_sexp){
+BEGIN_CPP11
+	arrow::r::Input<SEXP>::type s(s_sexp);
+	test_SET_STRING_ELT(s);
+	return R_NilValue;
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_test_SET_STRING_ELT(SEXP s_sexp){
+	Rf_error("Cannot call test_SET_STRING_ELT(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // array.cpp
 #if defined(ARROW_R_WITH_ARROW)
 std::shared_ptr<arrow::Array> Array__Slice1(const std::shared_ptr<arrow::Array>& array, R_xlen_t offset);
@@ -7053,6 +7069,7 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_s3_available", (DL_FUNC)& _s3_available, 0 },
 		{ "_json_available", (DL_FUNC)& _json_available, 0 },
 		{ "_arrow_is_altrep", (DL_FUNC) &_arrow_is_altrep, 1}, 
+		{ "_arrow_test_SET_STRING_ELT", (DL_FUNC) &_arrow_test_SET_STRING_ELT, 1}, 
 		{ "_arrow_Array__Slice1", (DL_FUNC) &_arrow_Array__Slice1, 2}, 
 		{ "_arrow_Array__Slice2", (DL_FUNC) &_arrow_Array__Slice2, 3}, 
 		{ "_arrow_Array__IsNull", (DL_FUNC) &_arrow_Array__IsNull, 2}, 
diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R
index a2fd7bf..d62e4dd 100644
--- a/r/tests/testthat/test-Array.R
+++ b/r/tests/testthat/test-Array.R
@@ -730,24 +730,66 @@ test_that("Handling string data with embedded nuls", {
     fixed = TRUE
   )
   array_with_nul <- Array$create(raws)$cast(utf8())
-  expect_error(
-    as.vector(array_with_nul),
-    paste0(
-      "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow ",
-      "to R, set options(arrow.skip_nul = TRUE)"
-    ),
+
+  # no error on conversion, because altrep laziness
+  v <- expect_error(as.vector(array_with_nul), NA)
+
+  # attempting materialization -> error
+
+  # TODO: this happens internally in ALTREP, and there we can't catch the error and
+  #       promote it
+  #
+  # expect_error(v[],
+  #   paste0(
+  #     "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow ",
+  #     "to R, set options(arrow.skip_nul = TRUE)"
+  #   ),
+  #   fixed = TRUE
+  # )
+  expect_error(v[],
+    "embedded nul in string: 'ma\\0n'",
+    fixed = TRUE
+  )
+
+  # also error on materializing v[3]
+  # expect_error(v[3],
+  #   paste0(
+  #    "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow ",
+  #    "to R, set options(arrow.skip_nul = TRUE)"
+  #   ),
+  #   fixed = TRUE
+  # )
+  expect_error(v[3],
+    "embedded nul in string",
     fixed = TRUE
   )
 
   withr::with_options(list(arrow.skip_nul = TRUE), {
+    # no warning yet because altrep laziness
+    v <- as.vector(array_with_nul)
+
     expect_warning(
-      expect_identical(
-        as.vector(array_with_nul),
+      expect_identical(v[],
         c("person", "woman", "man", "fan", "camera", "tv")
       ),
       "Stripping '\\0' (nul) from character vector",
       fixed = TRUE
     )
+
+    v <- as.vector(array_with_nul)
+    expect_warning(
+      expect_identical(v[3], "man"),
+      "Stripping '\\0' (nul) from character vector",
+      fixed = TRUE
+    )
+
+    v <- as.vector(array_with_nul)
+    expect_warning(
+      expect_identical(v[4], "fan"),
+      "Stripping '\\0' (nul) from character vector",
+      fixed = TRUE
+    )
+
   })
 })
 
diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R
index dc327c0..ba250dd 100644
--- a/r/tests/testthat/test-RecordBatch.R
+++ b/r/tests/testthat/test-RecordBatch.R
@@ -515,15 +515,26 @@ test_that("Handling string data with embedded nuls", {
   )
   batch_with_nul <- record_batch(a = 1:5, b = raws)
   batch_with_nul$b <- batch_with_nul$b$cast(utf8())
+
+  df <- as.data.frame(batch_with_nul)
+
+  # expect_error(
+  #   df[],
+  #   paste0(
+  #     "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, ",
+  #     "set options(arrow.skip_nul = TRUE)"
+  #   ),
+  #   fixed = TRUE
+  # )
   expect_error(
-    as.data.frame(batch_with_nul),
-    paste0(
-      "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, ",
-      "set options(arrow.skip_nul = TRUE)"
-    ),
+    df$b[],
+    "embedded nul in string: 'ma\\0n'",
     fixed = TRUE
   )
 
+  batch_with_nul <- record_batch(a = 1:5, b = raws)
+  batch_with_nul$b <- batch_with_nul$b$cast(utf8())
+
   withr::with_options(list(arrow.skip_nul = TRUE), {
     expect_warning(
       expect_equivalent(
diff --git a/r/tests/testthat/test-altrep.R b/r/tests/testthat/test-altrep.R
index 8cb989b..c1cd170 100644
--- a/r/tests/testthat/test-altrep.R
+++ b/r/tests/testthat/test-altrep.R
@@ -193,15 +193,24 @@ test_that("altrep min/max/sum identical to R versions for int", {
 test_that("altrep vectors handle serialization", {
   ints <- c(1L, 2L, NA_integer_)
   dbls <- c(1, 2, NA_real_)
+  strs <- c("un", "deux" , NA_character_)
 
   expect_identical(ints, unserialize(serialize(Array$create(ints)$as_vector(), NULL)))
   expect_identical(dbls, unserialize(serialize(Array$create(dbls)$as_vector(), NULL)))
+  expect_identical(strs, unserialize(serialize(Array$create(strs)$as_vector(), NULL)))
 })
 
 test_that("altrep vectors handle coercion", {
   ints <- c(1L, 2L, NA_integer_)
   dbls <- c(1, 2, NA_real_)
+  strs <- c("1", "2" , NA_character_)
 
   expect_identical(ints, as.integer(Array$create(dbls)$as_vector()))
+  expect_identical(ints, as.integer(Array$create(strs)$as_vector()))
+
   expect_identical(dbls, as.numeric(Array$create(ints)$as_vector()))
+  expect_identical(dbls, as.numeric(Array$create(strs)$as_vector()))
+
+  expect_identical(strs, as.character(Array$create(ints)$as_vector()))
+  expect_identical(strs, as.character(Array$create(dbls)$as_vector()))
 })
diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R
index 8ec8952..07a6059 100644
--- a/r/tests/testthat/test-chunked-array.R
+++ b/r/tests/testthat/test-chunked-array.R
@@ -430,21 +430,29 @@ test_that("Handling string data with embedded nuls", {
   class = c("arrow_binary", "vctrs_vctr", "list")
   )
   chunked_array_with_nul <- ChunkedArray$create(raws)$cast(utf8())
+  v <- expect_error(as.vector(chunked_array_with_nul), NA)
+
+  # TODO: when we figure out how to promote the error internally in altrep
+  #
+  # expect_error(
+  #   v[],
+  #   paste0(
+  #     "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, ",
+  #     "set options(arrow.skip_nul = TRUE)"
+  #   ),
+  #   fixed = TRUE
+  # )
+
   expect_error(
-    as.vector(chunked_array_with_nul),
-    paste0(
-      "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, ",
-      "set options(arrow.skip_nul = TRUE)"
-    ),
+    v[3],
+    "embedded nul in string: 'ma\\0n'",
     fixed = TRUE
   )
 
   withr::with_options(list(arrow.skip_nul = TRUE), {
+    v <- expect_warning(as.vector(chunked_array_with_nul), NA)
     expect_warning(
-      expect_identical(
-        as.vector(chunked_array_with_nul),
-        c("person", "woman", "man", "fan", "camera", "tv")
-      ),
+      expect_identical(v[3], "man"),
       "Stripping '\\0' (nul) from character vector",
       fixed = TRUE
     )
diff --git a/r/tests/testthat/test-scalar.R b/r/tests/testthat/test-scalar.R
index 566228c..8b6e5b6 100644
--- a/r/tests/testthat/test-scalar.R
+++ b/r/tests/testthat/test-scalar.R
@@ -85,19 +85,25 @@ test_that("Handling string data with embedded nuls", {
     fixed = TRUE
   )
   scalar_with_nul <- Scalar$create(raws, binary())$cast(utf8())
+
+  v <- expect_error(as.vector(scalar_with_nul), NA)
+  # expect_error(
+  #   v[1],
+  #   paste0(
+  #     "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, ",
+  #     "set options(arrow.skip_nul = TRUE)"
+  #   ),
+  #   fixed = TRUE
+  # )
   expect_error(
-    as.vector(scalar_with_nul),
-    paste0(
-      "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, ",
-      "set options(arrow.skip_nul = TRUE)"
-    ),
+    v[], "embedded nul in string: 'ma\\0n'",
     fixed = TRUE
   )
 
   withr::with_options(list(arrow.skip_nul = TRUE), {
     expect_warning(
       expect_identical(
-        as.vector(scalar_with_nul),
+        as.vector(scalar_with_nul)[],
         "man"
       ),
       "Stripping '\\0' (nul) from character vector",