You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/05/04 22:30:01 UTC

[GitHub] [arrow] wesm commented on a change in pull request #6959: ARROW-5649: [Integration][C++] Create integration test for extension types

wesm commented on a change in pull request #6959:
URL: https://github.com/apache/arrow/pull/6959#discussion_r419765783



##########
File path: cpp/src/arrow/util/key_value_metadata.cc
##########
@@ -94,11 +94,42 @@ Result<std::string> KeyValueMetadata::Get(const std::string& key) const {
   }
 }
 
+Status KeyValueMetadata::Delete(int64_t index) {
+  keys_.erase(keys_.begin() + index);
+  values_.erase(values_.begin() + index);
+  return Status::OK();
+}
+
+Status KeyValueMetadata::DeleteMany(std::vector<int64_t> indices) {
+  std::sort(indices.begin(), indices.end());
+  const int64_t size = static_cast<int64_t>(keys_.size());
+  indices.push_back(size);
+
+  int64_t shift = 0;
+  for (int64_t i = 0; i < static_cast<int64_t>(indices.size() - 1); ++i) {
+    ++shift;
+    const auto start = indices[i] + 1;
+    const auto stop = indices[i + 1];
+    DCHECK_GE(start, 0);
+    DCHECK_LE(start, size);
+    DCHECK_GE(stop, 0);
+    DCHECK_LE(stop, size);
+    for (int64_t index = start; index < stop; ++index) {
+      keys_[index - shift] = std::move(keys_[index]);
+      values_[index - shift] = std::move(values_[index]);
+    }
+  }
+  keys_.resize(size - shift);
+  values_.resize(size - shift);
+  return Status::OK();
+}
+
 Status KeyValueMetadata::Delete(const std::string& key) {
   auto index = FindKey(key);
   if (index < 0) {
     return Status::KeyError(key);
   } else {
+    return Delete(index);
     keys_.erase(keys_.begin() + index);
     values_.erase(values_.begin() + index);
     return Status::OK();

Review comment:
       These lines will never execute

##########
File path: cpp/src/parquet/arrow/reader_internal.cc
##########
@@ -634,30 +639,23 @@ Status ApplyOriginalMetadata(std::shared_ptr<Field> field, const Field& origin_f
     field = field->WithType(
         ::arrow::dictionary(::arrow::int32(), field->type(), dict_origin_type.ordered()));
   }
-  // restore field metadata
+
+  if (origin_type->id() == ::arrow::Type::EXTENSION) {
+    // Restore extension type, if the storage type is as read from Parquet
+    const auto& ex_type = checked_cast<const ::arrow::ExtensionType&>(*origin_type);
+    if (ex_type.storage_type()->Equals(*field->type())) {
+      field = field->WithType(origin_type);
+    }
+  }
+
+  // Restore field metadata
   std::shared_ptr<const KeyValueMetadata> field_metadata = origin_field.metadata();
   if (field_metadata != nullptr) {
     if (field->metadata()) {
       // Prefer the metadata keys (like field_id) from the current metadata
       field_metadata = field_metadata->Merge(*field->metadata());
     }
     field = field->WithMetadata(field_metadata);
-
-    // extension type
-    int name_index = field_metadata->FindKey(::arrow::kExtensionTypeKeyName);
-    if (name_index != -1) {
-      std::string type_name = field_metadata->value(name_index);
-      int data_index = field_metadata->FindKey(::arrow::kExtensionMetadataKeyName);
-      std::string type_data = data_index == -1 ? "" : field_metadata->value(data_index);
-
-      std::shared_ptr<::arrow::ExtensionType> ext_type =
-          ::arrow::GetExtensionType(type_name);
-      if (ext_type != nullptr) {
-        ARROW_ASSIGN_OR_RAISE(auto deserialized,
-                              ext_type->Deserialize(field->type(), type_data));
-        field = field->WithType(deserialized);
-      }
-    }

Review comment:
       Nice




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org