You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ro...@apache.org on 2021/09/16 09:10:27 UTC

[arrow] 02/02: use static methods in AltrepArrayString

This is an automated email from the ASF dual-hosted git repository.

romainfrancois pushed a commit to branch ARROW_13112_altrep_strings
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit 24256d00bcff4329fef65d90fe1a2d1a30551e5c
Author: Romain Francois <ro...@rstudio.com>
AuthorDate: Thu Sep 16 11:07:36 2021 +0200

    use static methods in AltrepArrayString
---
 r/R/arrowExports.R                    |   5 +
 r/src/altrep.cpp                      | 309 +++++++++++++++++++++++++++++++++-
 r/src/arrowExports.cpp                |  17 ++
 r/tests/testthat/test-Array.R         |  58 ++++++-
 r/tests/testthat/test-RecordBatch.R   |  21 ++-
 r/tests/testthat/test-altrep.R        |   9 +
 r/tests/testthat/test-chunked-array.R |  26 ++-
 r/tests/testthat/test-scalar.R        |  18 +-
 8 files changed, 431 insertions(+), 32 deletions(-)

diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index b852a3d..5913321 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -4,6 +4,10 @@ is_altrep <- function(x) {
   .Call(`_arrow_is_altrep`, x)
 }
 
+test_SET_STRING_ELT <- function(s) {
+  invisible(.Call(`_arrow_test_SET_STRING_ELT`, s))
+}
+
 Array__Slice1 <- function(array, offset) {
   .Call(`_arrow_Array__Slice1`, array, offset)
 }
@@ -1771,3 +1775,4 @@ SetIOThreadPoolCapacity <- function(threads) {
 Array__infer_type <- function(x) {
   .Call(`_arrow_Array__infer_type`, x)
 }
+
diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp
index 804a580..e8859ab 100644
--- a/r/src/altrep.cpp
+++ b/r/src/altrep.cpp
@@ -24,6 +24,7 @@
 #include <arrow/util/bitmap_reader.h>
 
 #include <cpp11/altrep.hpp>
+#include <cpp11/declarations.hpp>
 #if defined(HAS_ALTREP)
 
 #if R_VERSION < R_Version(3, 6, 0)
@@ -279,13 +280,284 @@ struct AltrepArrayPrimitive {
   static SEXP Unserialize(SEXP /* class_ */, SEXP state) { return state; }
 
   SEXP Coerce(int type) {
-    // Just let R handle it for now
-    return NULL;
+    Materialize();
+    return Rf_coerceVector(R_altrep_data2(alt_), type);
   }
 };
 template <int sexp_type>
 R_altrep_class_t AltrepArrayPrimitive<sexp_type>::class_t;
 
+struct AltrepArrayString {
+  static R_altrep_class_t class_t;
+
+  static void DeleteArray(std::shared_ptr<Array>* ptr) { delete ptr; }
+  using Pointer = cpp11::external_pointer<std::shared_ptr<Array>, DeleteArray>;
+
+  static SEXP Make(const std::shared_ptr<Array>& array) {
+    SEXP alt_ = R_new_altrep(class_t, Pointer(new std::shared_ptr<Array>(array)), R_NilValue);
+    MARK_NOT_MUTABLE(alt_);
+    return alt_;
+  }
+
+  static std::shared_ptr<Array> array(SEXP alt_) {
+    SEXP data1_ = R_altrep_data1(alt_);
+    if (Rf_isNull(data1_)) {
+      return nullptr;
+    }
+    return *Pointer(data1_);
+  }
+
+  static bool IsExpanded(SEXP alt_) {
+    return !Rf_isNull(R_altrep_data2(alt_));
+  }
+
+  static bool IsComplete(SEXP alt_) {
+    return Rf_isNull(R_altrep_data1(alt_));
+  }
+
+  static void ReleaseArray(SEXP alt_) {
+    R_set_altrep_data1(alt_, R_NilValue);
+  }
+
+  static SEXP Expand(SEXP alt_) {
+    if (!IsExpanded(alt_)) {
+      auto array_ = array(alt_);
+      R_xlen_t n = array_->length();
+      SEXP data2_ = PROTECT(Rf_allocVector(STRSXP, n));
+      if (n == 0) {
+        ReleaseArray(alt_);
+      } else {
+        // set individual strings to NULL (not yet materialized)
+        memset(STDVEC_DATAPTR(data2_), 0, n * sizeof(SEXP));
+      }
+      R_set_altrep_data2(alt_, data2_);
+      UNPROTECT(1);
+    }
+
+    return R_altrep_data2(alt_);
+  }
+
+  static R_xlen_t Length(SEXP alt_) {
+    return IsExpanded(alt_) ? XLENGTH(R_altrep_data2(alt_)) : array(alt_)->length();
+  }
+
+  static int No_NA(SEXP alt_) {
+    if (!IsExpanded(alt_)) return array(alt_)->null_count() == 0;
+
+    SEXP data2_ = R_altrep_data2(alt_);
+    R_xlen_t n = XLENGTH(data2_);
+    const SEXP* data2_ptr = STRING_PTR_RO(data2_);
+
+    if (IsComplete(alt_)) {
+      for (R_xlen_t i = 0; i < n; i++) {
+        if (data2_ptr[i] == NA_STRING) return false;
+      }
+      return true;
+    } else {
+      auto array_ = array(alt_);
+      for (R_xlen_t i = 0; i < n; i++) {
+        // not yet expanded, but null in the Array
+        if (data2_ptr[i] == NULL && array_->IsNull(i)) return false;
+
+        // already expanded to NULL
+        if (data2_ptr[i] == NA_STRING) return false;
+      }
+      return true;
+    }
+  }
+
+  static int Is_sorted(SEXP alt_) {
+    return UNKNOWN_SORTEDNESS;
+  }
+
+  static SEXP Elt(SEXP alt_, R_xlen_t i) {
+    BEGIN_CPP11
+      if (IsComplete(alt_)) {
+        return STRING_ELT(R_altrep_data2(alt_), i);
+      }
+
+      // make sure data2 is initiated
+      Expand(alt_);
+      SEXP data2_ = R_altrep_data2(alt_);
+
+      // data2[i] was already generated
+      SEXP s = STRING_ELT(data2_, i);
+      if (s != NULL) {
+        return s;
+      }
+
+      // data2[i] is missing
+      auto array_ = array(alt_);
+      if (array_->IsNull(i)) {
+        SET_STRING_ELT(data2_, i, NA_STRING);
+        return NA_STRING;
+      }
+
+      // data2[i] is a string, but we need care about embedded nuls
+      auto view = static_cast<StringArray*>(array_.get())->GetView(i);
+      const bool strip_out_nuls = GetBoolOption("arrow.skip_nul", false);
+      bool nul_was_stripped = false;
+      std::string stripped_string;
+
+      // both cases might jump, although it's less likely when
+      // nuls are stripped, but still we need the unwind protection
+      // so that C++ objects here are correctly destructed, whilst errors
+      // properly pass through to the R side
+      cpp11::unwind_protect([&](){
+        if (strip_out_nuls) {
+          s = r_string_from_view_strip_nul(view, stripped_string, &nul_was_stripped);
+        } else {
+          s = r_string_from_view(view);
+        }
+
+        if (nul_was_stripped) {
+          cpp11::warning("Stripping '\\0' (nul) from character vector");
+        }
+
+      });
+      SET_STRING_ELT(data2_, i, s);
+      return s;
+    END_CPP11
+  }
+
+  static void* Dataptr(SEXP alt_, Rboolean writeable) {
+    return DATAPTR(Complete(alt_));
+  }
+
+  static SEXP Complete(SEXP alt_) {
+    BEGIN_CPP11
+
+      if (!IsComplete(alt_)) {
+        Expand(alt_);
+        auto array_ = array(alt_);
+        SEXP data2_ = R_altrep_data2(alt_);
+        R_xlen_t n = XLENGTH(data2_);
+
+        std::string stripped_string;
+        const bool strip_out_nuls = GetBoolOption("arrow.skip_nul", false);
+        bool nul_was_stripped = false;
+        auto* string_array = static_cast<StringArray*>(array_.get());
+        util::string_view view;
+
+        cpp11::unwind_protect([&](){
+          for (R_xlen_t i = 0; i < n; i++) {
+            SEXP s = STRING_ELT(data2_, i);
+            if (s != NULL) {
+              continue;
+            }
+
+            if (array_->IsNull(i)) {
+              SET_STRING_ELT(data2_, i, NA_STRING);
+              continue;
+            }
+
+            view = string_array->GetView(i);
+            if (strip_out_nuls) {
+              s = r_string_from_view_strip_nul(view, stripped_string, &nul_was_stripped);
+            } else {
+              s = r_string_from_view(view);
+            }
+            SET_STRING_ELT(data2_, i, s);
+          }
+
+          if (nul_was_stripped) {
+            cpp11::warning("Stripping '\\0' (nul) from character vector");
+          }
+
+        });
+
+        ReleaseArray(alt_);
+      }
+      return R_altrep_data2(alt_);
+
+    END_CPP11
+  }
+
+  static const void* Dataptr_or_null(SEXP alt_) {
+    if (IsComplete(alt_)) return DATAPTR(R_altrep_data2(alt_));
+    return NULL;
+  }
+
+  static SEXP Serialized_state(SEXP alt_) {
+    return Complete(alt_);
+  }
+
+  static SEXP Unserialize(SEXP /* class_ */, SEXP state) {
+    return state;
+  }
+
+  static SEXP Duplicate(SEXP alt_, Rboolean /* deep */) {
+    return Rf_lazy_duplicate(Complete(alt_));
+  }
+
+  static Rboolean Inspect(SEXP alt_, int pre, int deep, int pvec,
+                   void (*inspect_subtree)(SEXP, int, int, int)) {
+    if (IsComplete(alt_)) {
+      inspect_subtree(R_altrep_data2(alt_), pre, deep, pvec);
+      return TRUE;
+    }
+
+    const auto& array_ = array(alt_);
+    Rprintf("arrow::Array<%s, %d nulls> len=%d, Array=<%p>\n",
+            array_->type()->ToString().c_str(), array_->null_count(),
+            array_->length(),
+            array_.get());
+    inspect_subtree(R_altrep_data1(alt_), pre, deep + 1, pvec);
+
+    return TRUE;
+  }
+
+  static SEXP Coerce(SEXP alt_, int type) {
+    return Rf_coerceVector(Complete(alt_), type);
+  }
+
+  // static method so that this can error without concerns of
+  // destruction for the
+  static void Set_elt(SEXP alt_, R_xlen_t i, SEXP v) {
+    Rf_error("ALTSTRING objects of type <arrow::array_string_vector> are immutable");
+  }
+
+  static SEXP r_string_from_view_strip_nul(arrow::util::string_view view, std::string& stripped_string,
+                                           bool* nul_was_stripped) {
+    const char* old_string = view.data();
+
+    size_t stripped_len = 0, nul_count = 0;
+
+    for (size_t i = 0; i < view.size(); i++) {
+      if (old_string[i] == '\0') {
+        ++nul_count;
+
+        if (nul_count == 1) {
+          // first nul spotted: allocate stripped string storage
+          stripped_string = view.to_string();
+          stripped_len = i;
+        }
+
+        // don't copy old_string[i] (which is \0) into stripped_string
+        continue;
+      }
+
+      if (nul_count > 0) {
+        stripped_string[stripped_len++] = old_string[i];
+      }
+    }
+
+    if (nul_count > 0) {
+      *nul_was_stripped = true;
+      stripped_string.resize(stripped_len);
+      return r_string_from_view(stripped_string);
+    }
+
+    return r_string_from_view(view);
+  }
+
+  static SEXP r_string_from_view(arrow::util::string_view view) {
+    return Rf_mkCharLenCE(view.data(), view.size(), CE_UTF8);
+  }
+};
+
+R_altrep_class_t AltrepArrayString::class_t;
+
 // The methods below are how R interacts with the altrep objects.
 //
 // They all use the same pattern: create a C++ object of the
@@ -493,10 +765,31 @@ void InitAltIntegerClass(DllInfo* dll, const char* name) {
   InitAltIntegerMethods<AltrepClass>(AltrepClass::class_t, dll);
 }
 
+template <typename AltrepClass>
+void InitAltStringClass(DllInfo* dll, const char* name) {
+  AltrepClass::class_t = R_make_altstring_class(name, "arrow", dll);
+  R_set_altrep_Length_method(AltrepClass::class_t, AltrepClass::Length);
+  R_set_altrep_Inspect_method(AltrepClass::class_t, AltrepClass::Inspect);
+  R_set_altrep_Duplicate_method(AltrepClass::class_t, AltrepClass::Duplicate);
+  R_set_altrep_Serialized_state_method(AltrepClass::class_t, AltrepClass::Serialized_state);
+  R_set_altrep_Unserialize_method(AltrepClass::class_t, AltrepClass::Unserialize);
+  R_set_altrep_Coerce_method(AltrepClass::class_t, AltrepClass::Coerce);
+
+  R_set_altvec_Dataptr_method(AltrepClass::class_t, AltrepClass::Dataptr);
+  R_set_altvec_Dataptr_or_null_method(AltrepClass::class_t, AltrepClass::Dataptr_or_null);
+
+  R_set_altstring_Elt_method(AltrepClass::class_t, AltrepClass::Elt);
+  R_set_altstring_Set_elt_method(AltrepClass::class_t, AltrepClass::Set_elt);
+  R_set_altstring_No_NA_method(AltrepClass::class_t, AltrepClass::No_NA);
+  R_set_altstring_Is_sorted_method(AltrepClass::class_t, AltrepClass::Is_sorted);
+}
+
 // initialize the altrep classes
 void Init_Altrep_classes(DllInfo* dll) {
-  InitAltRealClass<AltrepArrayPrimitive<REALSXP>>(dll, "array_dbl_vector");
-  InitAltIntegerClass<AltrepArrayPrimitive<INTSXP>>(dll, "array_int_vector");
+  InitAltRealClass<AltrepArrayPrimitive<REALSXP>>(dll, "arrow::array_dbl_vector");
+  InitAltIntegerClass<AltrepArrayPrimitive<INTSXP>>(dll, "arrow::array_int_vector");
+
+  InitAltStringClass<AltrepArrayString>(dll, "arrow::array_string_vector");
 }
 
 // return an altrep R vector that shadows the array if possible
@@ -508,6 +801,9 @@ SEXP MakeAltrepArrayPrimitive(const std::shared_ptr<Array>& array) {
     case arrow::Type::INT32:
       return altrep::AltrepArrayPrimitive<INTSXP>(array).alt_;
 
+    case arrow::Type::STRING:
+      return altrep::AltrepArrayString::Make(array);
+
     default:
       break;
   }
@@ -530,4 +826,9 @@ bool is_altrep(SEXP x) {
 #endif
 }
 
+// [[arrow::export]]
+void test_SET_STRING_ELT(SEXP s) {
+  SET_STRING_ELT(s, 0, Rf_mkChar("forbidden"));
+}
+
 #endif
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index f33b81c..99c104f 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -19,6 +19,22 @@ extern "C" SEXP _arrow_is_altrep(SEXP x_sexp){
 }
 #endif
 
+// altrep.cpp
+#if defined(ARROW_R_WITH_ARROW)
+void test_SET_STRING_ELT(SEXP s);
+extern "C" SEXP _arrow_test_SET_STRING_ELT(SEXP s_sexp){
+BEGIN_CPP11
+	arrow::r::Input<SEXP>::type s(s_sexp);
+	test_SET_STRING_ELT(s);
+	return R_NilValue;
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_test_SET_STRING_ELT(SEXP s_sexp){
+	Rf_error("Cannot call test_SET_STRING_ELT(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // array.cpp
 #if defined(ARROW_R_WITH_ARROW)
 std::shared_ptr<arrow::Array> Array__Slice1(const std::shared_ptr<arrow::Array>& array, R_xlen_t offset);
@@ -7053,6 +7069,7 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_s3_available", (DL_FUNC)& _s3_available, 0 },
 		{ "_json_available", (DL_FUNC)& _json_available, 0 },
 		{ "_arrow_is_altrep", (DL_FUNC) &_arrow_is_altrep, 1}, 
+		{ "_arrow_test_SET_STRING_ELT", (DL_FUNC) &_arrow_test_SET_STRING_ELT, 1}, 
 		{ "_arrow_Array__Slice1", (DL_FUNC) &_arrow_Array__Slice1, 2}, 
 		{ "_arrow_Array__Slice2", (DL_FUNC) &_arrow_Array__Slice2, 3}, 
 		{ "_arrow_Array__IsNull", (DL_FUNC) &_arrow_Array__IsNull, 2}, 
diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R
index a2fd7bf..d62e4dd 100644
--- a/r/tests/testthat/test-Array.R
+++ b/r/tests/testthat/test-Array.R
@@ -730,24 +730,66 @@ test_that("Handling string data with embedded nuls", {
     fixed = TRUE
   )
   array_with_nul <- Array$create(raws)$cast(utf8())
-  expect_error(
-    as.vector(array_with_nul),
-    paste0(
-      "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow ",
-      "to R, set options(arrow.skip_nul = TRUE)"
-    ),
+
+  # no error on conversion, because altrep laziness
+  v <- expect_error(as.vector(array_with_nul), NA)
+
+  # attempting materialization -> error
+
+  # TODO: this happens internally in ALTREP, and there we can't catch the error and
+  #       promote it
+  #
+  # expect_error(v[],
+  #   paste0(
+  #     "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow ",
+  #     "to R, set options(arrow.skip_nul = TRUE)"
+  #   ),
+  #   fixed = TRUE
+  # )
+  expect_error(v[],
+    "embedded nul in string: 'ma\\0n'",
+    fixed = TRUE
+  )
+
+  # also error on materializing v[3]
+  # expect_error(v[3],
+  #   paste0(
+  #    "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow ",
+  #    "to R, set options(arrow.skip_nul = TRUE)"
+  #   ),
+  #   fixed = TRUE
+  # )
+  expect_error(v[3],
+    "embedded nul in string",
     fixed = TRUE
   )
 
   withr::with_options(list(arrow.skip_nul = TRUE), {
+    # no warning yet because altrep laziness
+    v <- as.vector(array_with_nul)
+
     expect_warning(
-      expect_identical(
-        as.vector(array_with_nul),
+      expect_identical(v[],
         c("person", "woman", "man", "fan", "camera", "tv")
       ),
       "Stripping '\\0' (nul) from character vector",
       fixed = TRUE
     )
+
+    v <- as.vector(array_with_nul)
+    expect_warning(
+      expect_identical(v[3], "man"),
+      "Stripping '\\0' (nul) from character vector",
+      fixed = TRUE
+    )
+
+    v <- as.vector(array_with_nul)
+    expect_warning(
+      expect_identical(v[4], "fan"),
+      "Stripping '\\0' (nul) from character vector",
+      fixed = TRUE
+    )
+
   })
 })
 
diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R
index dc327c0..ba250dd 100644
--- a/r/tests/testthat/test-RecordBatch.R
+++ b/r/tests/testthat/test-RecordBatch.R
@@ -515,15 +515,26 @@ test_that("Handling string data with embedded nuls", {
   )
   batch_with_nul <- record_batch(a = 1:5, b = raws)
   batch_with_nul$b <- batch_with_nul$b$cast(utf8())
+
+  df <- as.data.frame(batch_with_nul)
+
+  # expect_error(
+  #   df[],
+  #   paste0(
+  #     "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, ",
+  #     "set options(arrow.skip_nul = TRUE)"
+  #   ),
+  #   fixed = TRUE
+  # )
   expect_error(
-    as.data.frame(batch_with_nul),
-    paste0(
-      "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, ",
-      "set options(arrow.skip_nul = TRUE)"
-    ),
+    df$b[],
+    "embedded nul in string: 'ma\\0n'",
     fixed = TRUE
   )
 
+  batch_with_nul <- record_batch(a = 1:5, b = raws)
+  batch_with_nul$b <- batch_with_nul$b$cast(utf8())
+
   withr::with_options(list(arrow.skip_nul = TRUE), {
     expect_warning(
       expect_equivalent(
diff --git a/r/tests/testthat/test-altrep.R b/r/tests/testthat/test-altrep.R
index 8cb989b..c1cd170 100644
--- a/r/tests/testthat/test-altrep.R
+++ b/r/tests/testthat/test-altrep.R
@@ -193,15 +193,24 @@ test_that("altrep min/max/sum identical to R versions for int", {
 test_that("altrep vectors handle serialization", {
   ints <- c(1L, 2L, NA_integer_)
   dbls <- c(1, 2, NA_real_)
+  strs <- c("un", "deux" , NA_character_)
 
   expect_identical(ints, unserialize(serialize(Array$create(ints)$as_vector(), NULL)))
   expect_identical(dbls, unserialize(serialize(Array$create(dbls)$as_vector(), NULL)))
+  expect_identical(strs, unserialize(serialize(Array$create(strs)$as_vector(), NULL)))
 })
 
 test_that("altrep vectors handle coercion", {
   ints <- c(1L, 2L, NA_integer_)
   dbls <- c(1, 2, NA_real_)
+  strs <- c("1", "2" , NA_character_)
 
   expect_identical(ints, as.integer(Array$create(dbls)$as_vector()))
+  expect_identical(ints, as.integer(Array$create(strs)$as_vector()))
+
   expect_identical(dbls, as.numeric(Array$create(ints)$as_vector()))
+  expect_identical(dbls, as.numeric(Array$create(strs)$as_vector()))
+
+  expect_identical(strs, as.character(Array$create(ints)$as_vector()))
+  expect_identical(strs, as.character(Array$create(dbls)$as_vector()))
 })
diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R
index 8ec8952..07a6059 100644
--- a/r/tests/testthat/test-chunked-array.R
+++ b/r/tests/testthat/test-chunked-array.R
@@ -430,21 +430,29 @@ test_that("Handling string data with embedded nuls", {
   class = c("arrow_binary", "vctrs_vctr", "list")
   )
   chunked_array_with_nul <- ChunkedArray$create(raws)$cast(utf8())
+  v <- expect_error(as.vector(chunked_array_with_nul), NA)
+
+  # TODO: when we figure out how to promote the error internally in altrep
+  #
+  # expect_error(
+  #   v[],
+  #   paste0(
+  #     "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, ",
+  #     "set options(arrow.skip_nul = TRUE)"
+  #   ),
+  #   fixed = TRUE
+  # )
+
   expect_error(
-    as.vector(chunked_array_with_nul),
-    paste0(
-      "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, ",
-      "set options(arrow.skip_nul = TRUE)"
-    ),
+    v[3],
+    "embedded nul in string: 'ma\\0n'",
     fixed = TRUE
   )
 
   withr::with_options(list(arrow.skip_nul = TRUE), {
+    v <- expect_warning(as.vector(chunked_array_with_nul), NA)
     expect_warning(
-      expect_identical(
-        as.vector(chunked_array_with_nul),
-        c("person", "woman", "man", "fan", "camera", "tv")
-      ),
+      expect_identical(v[3], "man"),
       "Stripping '\\0' (nul) from character vector",
       fixed = TRUE
     )
diff --git a/r/tests/testthat/test-scalar.R b/r/tests/testthat/test-scalar.R
index 566228c..8b6e5b6 100644
--- a/r/tests/testthat/test-scalar.R
+++ b/r/tests/testthat/test-scalar.R
@@ -85,19 +85,25 @@ test_that("Handling string data with embedded nuls", {
     fixed = TRUE
   )
   scalar_with_nul <- Scalar$create(raws, binary())$cast(utf8())
+
+  v <- expect_error(as.vector(scalar_with_nul), NA)
+  # expect_error(
+  #   v[1],
+  #   paste0(
+  #     "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, ",
+  #     "set options(arrow.skip_nul = TRUE)"
+  #   ),
+  #   fixed = TRUE
+  # )
   expect_error(
-    as.vector(scalar_with_nul),
-    paste0(
-      "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, ",
-      "set options(arrow.skip_nul = TRUE)"
-    ),
+    v[], "embedded nul in string: 'ma\\0n'",
     fixed = TRUE
   )
 
   withr::with_options(list(arrow.skip_nul = TRUE), {
     expect_warning(
       expect_identical(
-        as.vector(scalar_with_nul),
+        as.vector(scalar_with_nul)[],
         "man"
       ),
       "Stripping '\\0' (nul) from character vector",