You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/08/07 17:58:52 UTC

[GitHub] [arrow] bkietz commented on a change in pull request #7819: ARROW-9405: [R] Switch to cpp11

bkietz commented on a change in pull request #7819:
URL: https://github.com/apache/arrow/pull/7819#discussion_r467084647



##########
File path: r/src/array_from_vector.cpp
##########
@@ -406,9 +403,12 @@ std::shared_ptr<Array> MakeFactorArray(Rcpp::IntegerVector_ factor,
     case Type::INT64:
       return MakeFactorArrayImpl<arrow::Int64Type>(factor, type);
     default:
-      Rcpp::stop(tfm::format("Cannot convert to dictionary with index_type %s",
-                             dict_type.index_type()->ToString()));
+      break;
   }
+
+  cpp11::stop("Cannot convert to dictionary with index_type '%s'",
+              dict_type.index_type()->ToString().c_str());
+  return nullptr;

Review comment:
       There *should* be no need to return here since `cpp11::stop()` is marked `[[noreturn]]`

##########
File path: r/src/array_from_vector.cpp
##########
@@ -1064,42 +1063,42 @@ class FixedSizeBinaryVectorConverter : public VectorConverter {
   FixedSizeBinaryBuilder* typed_builder_;
 };
 
-template <typename Builder>
+template <typename StringBuilder>
 class StringVectorConverter : public VectorConverter {
  public:
   ~StringVectorConverter() {}
 
   Status Init(ArrayBuilder* builder) {
-    typed_builder_ = checked_cast<Builder*>(builder);
+    typed_builder_ = checked_cast<StringBuilder*>(builder);
     return Status::OK();
   }
 
   Status Ingest(SEXP obj) {
     ARROW_RETURN_IF(TYPEOF(obj) != STRSXP,
                     Status::RError("Expecting a character vector"));
-    R_xlen_t n = XLENGTH(obj);
 
-    // Reserve enough space before appending
-    int64_t size = 0;
-    for (R_xlen_t i = 0; i < n; i++) {
-      SEXP string_i = STRING_ELT(obj, i);
-      if (string_i != NA_STRING) {
-        size += XLENGTH(Rf_mkCharCE(Rf_translateCharUTF8(string_i), CE_UTF8));
-      }
+    cpp11::strings s(obj);
+    RETURN_NOT_OK(typed_builder_->Reserve(s.size()));
+
+    // note: the total length is calculated without utf8
+    //       conversion, so see this more as a hint rather than
+    //       the actual total length
+    auto total_length_hint = 0;

Review comment:
       ```suggestion
       int64_t total_length_hint = 0;
   ```

##########
File path: r/src/arrow_cpp11.h
##########
@@ -0,0 +1,243 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <limits>
+#include <memory>
+#include <utility>
+#include <vector>
+#undef Free
+
+namespace cpp11 {
+
+template <typename T>
+SEXP as_sexp(const std::shared_ptr<T>& ptr);
+
+template <typename T>
+SEXP as_sexp(const std::vector<std::shared_ptr<T>>& vec);
+
+template <typename E, typename std::enable_if<std::is_enum<E>::value>::type* = nullptr>
+SEXP as_sexp(E e);
+
+}  // namespace cpp11
+
+#include <cpp11.hpp>

Review comment:
       please keep all includes at the top of the file

##########
File path: r/src/arrow_cpp11.h
##########
@@ -0,0 +1,243 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <limits>
+#include <memory>
+#include <utility>
+#include <vector>
+#undef Free
+
+namespace cpp11 {
+
+template <typename T>
+SEXP as_sexp(const std::shared_ptr<T>& ptr);
+
+template <typename T>
+SEXP as_sexp(const std::vector<std::shared_ptr<T>>& vec);
+
+template <typename E, typename std::enable_if<std::is_enum<E>::value>::type* = nullptr>
+SEXP as_sexp(E e);
+
+}  // namespace cpp11
+
+#include <cpp11.hpp>
+
+namespace arrow {
+namespace r {
+struct symbols {
+  static SEXP units;
+  static SEXP tzone;
+  static SEXP xp;
+  static SEXP dot_Internal;
+  static SEXP inspect;
+  static SEXP row_names;
+  static SEXP serialize_arrow_r_metadata;
+  static SEXP as_list;
+  static SEXP ptype;
+  static SEXP byte_width;
+  static SEXP list_size;
+};
+
+struct data {
+  static SEXP classes_POSIXct;
+  static SEXP classes_metadata_r;
+  static SEXP classes_vctrs_list_of;
+  static SEXP classes_tbl_df;
+
+  static SEXP classes_arrow_binary;
+  static SEXP classes_arrow_large_binary;
+  static SEXP classes_arrow_fixed_size_binary;
+
+  static SEXP classes_arrow_list;
+  static SEXP classes_arrow_large_list;
+  static SEXP classes_arrow_fixed_size_list;
+
+  static SEXP classes_factor;
+  static SEXP classes_ordered;
+
+  static SEXP names_metadata;
+  static SEXP empty_raw;
+};
+
+struct ns {
+  static SEXP arrow;
+};
+
+template <typename Pointer>
+Pointer r6_to_pointer(SEXP self) {
+  return reinterpret_cast<Pointer>(
+      R_ExternalPtrAddr(Rf_findVarInFrame(self, arrow::r::symbols::xp)));
+}
+
+template <typename T, template <class> class SmartPtr>
+class ConstRefSmartPtrInput {
+ public:
+  using const_reference = const SmartPtr<T>&;
+
+  explicit ConstRefSmartPtrInput(SEXP self)
+      : ptr(r6_to_pointer<const SmartPtr<T>*>(self)) {}
+
+  inline operator const_reference() { return *ptr; }
+
+ private:
+  // this class host
+  const SmartPtr<T>* ptr;
+};
+
+template <typename T, template <class> class SmartPtr>
+class ConstRefVectorSmartPtrInput {
+ public:
+  using const_reference = const std::vector<SmartPtr<T>>&;
+
+  explicit ConstRefVectorSmartPtrInput(SEXP self) : vec() {
+    R_xlen_t n = XLENGTH(self);
+    for (R_xlen_t i = 0; i < n; i++) {
+      vec.push_back(*r6_to_pointer<const SmartPtr<T>*>(VECTOR_ELT(self, i)));
+    }
+  }
+
+  inline operator const_reference() { return vec; }
+
+ private:
+  std::vector<SmartPtr<T>> vec;
+};

Review comment:
       Similarly:
   ```suggestion
   template <typename T>
   class VectorExternalPtrInput {
    public:
     explicit VectorExternalPtrInput(SEXP self) : vec_(XLENGTH(self)) {
       R_xlen_t i = 0;
       for (auto& element : vec_) {
         element = *r6_to_pointer<const T*>(VECTOR_ELT(self, i++));
       }
     }
   
     operator const std::vector<T>&() const { return vec_; }
   
    private:
     std::vector<T> vec_;
   };
   ```
   used as:
   ```c++
   VectorExternalPtrInput<std::shared_ptr<Array>> array_vector{chunks};
   ```

##########
File path: r/src/arrow_cpp11.h
##########
@@ -0,0 +1,243 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <limits>
+#include <memory>
+#include <utility>
+#include <vector>
+#undef Free
+
+namespace cpp11 {
+
+template <typename T>
+SEXP as_sexp(const std::shared_ptr<T>& ptr);
+
+template <typename T>
+SEXP as_sexp(const std::vector<std::shared_ptr<T>>& vec);
+
+template <typename E, typename std::enable_if<std::is_enum<E>::value>::type* = nullptr>
+SEXP as_sexp(E e);
+

Review comment:
       ```suggestion
   ```
   can this be removed after https://github.com/r-lib/cpp11/pull/65 ?

##########
File path: r/src/arrow_cpp11.h
##########
@@ -0,0 +1,243 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <limits>
+#include <memory>
+#include <utility>
+#include <vector>
+#undef Free
+
+namespace cpp11 {
+
+template <typename T>
+SEXP as_sexp(const std::shared_ptr<T>& ptr);
+
+template <typename T>
+SEXP as_sexp(const std::vector<std::shared_ptr<T>>& vec);
+
+template <typename E, typename std::enable_if<std::is_enum<E>::value>::type* = nullptr>
+SEXP as_sexp(E e);
+
+}  // namespace cpp11
+
+#include <cpp11.hpp>
+
+namespace arrow {
+namespace r {
+struct symbols {
+  static SEXP units;
+  static SEXP tzone;
+  static SEXP xp;
+  static SEXP dot_Internal;
+  static SEXP inspect;
+  static SEXP row_names;
+  static SEXP serialize_arrow_r_metadata;
+  static SEXP as_list;
+  static SEXP ptype;
+  static SEXP byte_width;
+  static SEXP list_size;
+};
+
+struct data {
+  static SEXP classes_POSIXct;
+  static SEXP classes_metadata_r;
+  static SEXP classes_vctrs_list_of;
+  static SEXP classes_tbl_df;
+
+  static SEXP classes_arrow_binary;
+  static SEXP classes_arrow_large_binary;
+  static SEXP classes_arrow_fixed_size_binary;
+
+  static SEXP classes_arrow_list;
+  static SEXP classes_arrow_large_list;
+  static SEXP classes_arrow_fixed_size_list;
+
+  static SEXP classes_factor;
+  static SEXP classes_ordered;
+
+  static SEXP names_metadata;
+  static SEXP empty_raw;
+};
+
+struct ns {
+  static SEXP arrow;
+};
+
+template <typename Pointer>
+Pointer r6_to_pointer(SEXP self) {
+  return reinterpret_cast<Pointer>(
+      R_ExternalPtrAddr(Rf_findVarInFrame(self, arrow::r::symbols::xp)));
+}
+
+template <typename T, template <class> class SmartPtr>
+class ConstRefSmartPtrInput {
+ public:
+  using const_reference = const SmartPtr<T>&;
+
+  explicit ConstRefSmartPtrInput(SEXP self)
+      : ptr(r6_to_pointer<const SmartPtr<T>*>(self)) {}
+
+  inline operator const_reference() { return *ptr; }
+
+ private:
+  // this class host
+  const SmartPtr<T>* ptr;
+};

Review comment:
       I don't think this class needs to be specific to smart pointers:
   ```suggestion
   template <typename T>
   class ExternalPtrInput {
    public:
     explicit ExternalPtrInput(SEXP self)
         : ptr_(r6_to_pointer<const T*>(self)) {}
   
     operator const T&() const { return *ptr_; }
   
    private:
     // this class host
     const T* ptr_;
   };
   ```
   
   Then we can use it like:
   ```c++
   ExternalPtrInput<std::shared_ptr<Array>> array{array_sexp};
   ```

##########
File path: r/src/array_from_vector.cpp
##########
@@ -1064,42 +1063,42 @@ class FixedSizeBinaryVectorConverter : public VectorConverter {
   FixedSizeBinaryBuilder* typed_builder_;
 };
 
-template <typename Builder>
+template <typename StringBuilder>
 class StringVectorConverter : public VectorConverter {
  public:
   ~StringVectorConverter() {}
 
   Status Init(ArrayBuilder* builder) {
-    typed_builder_ = checked_cast<Builder*>(builder);
+    typed_builder_ = checked_cast<StringBuilder*>(builder);
     return Status::OK();
   }
 
   Status Ingest(SEXP obj) {
     ARROW_RETURN_IF(TYPEOF(obj) != STRSXP,
                     Status::RError("Expecting a character vector"));
-    R_xlen_t n = XLENGTH(obj);
 
-    // Reserve enough space before appending
-    int64_t size = 0;
-    for (R_xlen_t i = 0; i < n; i++) {
-      SEXP string_i = STRING_ELT(obj, i);
-      if (string_i != NA_STRING) {
-        size += XLENGTH(Rf_mkCharCE(Rf_translateCharUTF8(string_i), CE_UTF8));
-      }
+    cpp11::strings s(obj);
+    RETURN_NOT_OK(typed_builder_->Reserve(s.size()));
+
+    // note: the total length is calculated without utf8
+    //       conversion, so see this more as a hint rather than
+    //       the actual total length
+    auto total_length_hint = 0;
+    for (cpp11::r_string si : s) {
+      total_length_hint += (si == NA_STRING) ? 0 : si.size();

Review comment:
       Style question: should we prefer `cpp11` utilities over things like `NA_STRING`?
   ```suggestion
         total_length_hint += cpp11::is_na(si) ? 0 : si.size();
   ```

##########
File path: r/src/arrow_cpp11.h
##########
@@ -0,0 +1,243 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <limits>
+#include <memory>
+#include <utility>
+#include <vector>
+#undef Free
+
+namespace cpp11 {
+
+template <typename T>
+SEXP as_sexp(const std::shared_ptr<T>& ptr);
+
+template <typename T>
+SEXP as_sexp(const std::vector<std::shared_ptr<T>>& vec);
+
+template <typename E, typename std::enable_if<std::is_enum<E>::value>::type* = nullptr>
+SEXP as_sexp(E e);
+
+}  // namespace cpp11
+
+#include <cpp11.hpp>
+
+namespace arrow {
+namespace r {
+struct symbols {
+  static SEXP units;
+  static SEXP tzone;
+  static SEXP xp;
+  static SEXP dot_Internal;
+  static SEXP inspect;
+  static SEXP row_names;
+  static SEXP serialize_arrow_r_metadata;
+  static SEXP as_list;
+  static SEXP ptype;
+  static SEXP byte_width;
+  static SEXP list_size;
+};
+
+struct data {
+  static SEXP classes_POSIXct;
+  static SEXP classes_metadata_r;
+  static SEXP classes_vctrs_list_of;
+  static SEXP classes_tbl_df;
+
+  static SEXP classes_arrow_binary;
+  static SEXP classes_arrow_large_binary;
+  static SEXP classes_arrow_fixed_size_binary;
+
+  static SEXP classes_arrow_list;
+  static SEXP classes_arrow_large_list;
+  static SEXP classes_arrow_fixed_size_list;
+
+  static SEXP classes_factor;
+  static SEXP classes_ordered;
+
+  static SEXP names_metadata;
+  static SEXP empty_raw;
+};
+
+struct ns {
+  static SEXP arrow;
+};
+
+template <typename Pointer>
+Pointer r6_to_pointer(SEXP self) {
+  return reinterpret_cast<Pointer>(
+      R_ExternalPtrAddr(Rf_findVarInFrame(self, arrow::r::symbols::xp)));
+}
+
+template <typename T, template <class> class SmartPtr>
+class ConstRefSmartPtrInput {
+ public:
+  using const_reference = const SmartPtr<T>&;
+
+  explicit ConstRefSmartPtrInput(SEXP self)
+      : ptr(r6_to_pointer<const SmartPtr<T>*>(self)) {}
+
+  inline operator const_reference() { return *ptr; }
+
+ private:
+  // this class host
+  const SmartPtr<T>* ptr;
+};
+
+template <typename T, template <class> class SmartPtr>
+class ConstRefVectorSmartPtrInput {
+ public:
+  using const_reference = const std::vector<SmartPtr<T>>&;
+
+  explicit ConstRefVectorSmartPtrInput(SEXP self) : vec() {
+    R_xlen_t n = XLENGTH(self);
+    for (R_xlen_t i = 0; i < n; i++) {
+      vec.push_back(*r6_to_pointer<const SmartPtr<T>*>(VECTOR_ELT(self, i)));
+    }
+  }
+
+  inline operator const_reference() { return vec; }
+
+ private:
+  std::vector<SmartPtr<T>> vec;
+};
+
+template <typename T>
+class default_input {

Review comment:
       These are named with snake case, which differs from ConstRefSmartPtrInput. Is that intentional? I'm fine with either convention but we should probably stick with one




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org