You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by sh...@apache.org on 2019/03/15 00:03:24 UTC

[arrow] branch master updated: ARROW-4862: [GLib] Add GArrowCastOptions::allow-invalid-utf8 property

This is an automated email from the ASF dual-hosted git repository.

shiro pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 99a47ab  ARROW-4862: [GLib] Add GArrowCastOptions::allow-invalid-utf8 property
99a47ab is described below

commit 99a47ab1f8d3a89e5f49006f072c9fba276858e1
Author: Kouhei Sutou <ko...@clear-code.com>
AuthorDate: Fri Mar 15 09:03:11 2019 +0900

    ARROW-4862: [GLib] Add GArrowCastOptions::allow-invalid-utf8 property
    
    Author: Kouhei Sutou <ko...@clear-code.com>
    
    Closes #3894 from kou/glib-cast-options-allow-invalid-utf8 and squashes the following commits:
    
    9fc06744 <Kouhei Sutou>  Add GArrowCastOptions::allow-invalid-utf8 property
---
 c_glib/arrow-glib/compute.cpp | 24 +++++++++++++++++++++++-
 c_glib/test/test-cast.rb      | 17 +++++++++++++++++
 2 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp
index 2039eea..a9f6721 100644
--- a/c_glib/arrow-glib/compute.cpp
+++ b/c_glib/arrow-glib/compute.cpp
@@ -42,7 +42,8 @@ enum {
   PROP_0,
   PROP_ALLOW_INT_OVERFLOW,
   PROP_ALLOW_TIME_TRUNCATE,
-  PROP_ALLOW_FLOAT_TRUNCATE
+  PROP_ALLOW_FLOAT_TRUNCATE,
+  PROP_ALLOW_INVALID_UTF8,
 };
 
 G_DEFINE_TYPE_WITH_PRIVATE(GArrowCastOptions,
@@ -72,6 +73,9 @@ garrow_cast_options_set_property(GObject *object,
   case PROP_ALLOW_FLOAT_TRUNCATE:
     priv->options.allow_float_truncate = g_value_get_boolean(value);
     break;
+  case PROP_ALLOW_INVALID_UTF8:
+    priv->options.allow_invalid_utf8 = g_value_get_boolean(value);
+    break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
     break;
@@ -96,6 +100,9 @@ garrow_cast_options_get_property(GObject *object,
   case PROP_ALLOW_FLOAT_TRUNCATE:
     g_value_set_boolean(value, priv->options.allow_float_truncate);
     break;
+  case PROP_ALLOW_INVALID_UTF8:
+    g_value_set_boolean(value, priv->options.allow_invalid_utf8);
+    break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
     break;
@@ -157,6 +164,20 @@ garrow_cast_options_class_init(GArrowCastOptionsClass *klass)
                               FALSE,
                               static_cast<GParamFlags>(G_PARAM_READWRITE));
   g_object_class_install_property(gobject_class, PROP_ALLOW_FLOAT_TRUNCATE, spec);
+
+  /**
+   * GArrowCastOptions:allow-invalid-utf8:
+   *
+   * Whether invalid UTF-8 string value is allowed or not.
+   *
+   * Since: 0.13.0
+   */
+  spec = g_param_spec_boolean("allow-invalid-utf8",
+                              "Allow invalid UTF-8",
+                              "Whether invalid UTF-8 string value is allowed or not",
+                              FALSE,
+                              static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class, PROP_ALLOW_INVALID_UTF8, spec);
 }
 
 /**
@@ -183,6 +204,7 @@ garrow_cast_options_new_raw(arrow::compute::CastOptions *arrow_cast_options)
                  "allow-int-overflow", arrow_cast_options->allow_int_overflow,
                  "allow-time-truncate", arrow_cast_options->allow_time_truncate,
                  "allow-float-truncate", arrow_cast_options->allow_float_truncate,
+                 "allow-invalid-utf8", arrow_cast_options->allow_invalid_utf8,
                  NULL);
   return GARROW_CAST_OPTIONS(cast_options);
 }
diff --git a/c_glib/test/test-cast.rb b/c_glib/test/test-cast.rb
index 2512e05..f9d406c 100644
--- a/c_glib/test/test-cast.rb
+++ b/c_glib/test/test-cast.rb
@@ -82,4 +82,21 @@ class TestCast < Test::Unit::TestCase
                    build_float_array([1.1]).cast(int8_data_type, options))
     end
   end
+
+  sub_test_case("allow-invalid-utf8") do
+    def test_default
+      require_gi(1, 42, 0)
+      assert_raise(Arrow::Error::Invalid) do
+        build_binary_array(["\xff"]).cast(Arrow::StringDataType.new)
+      end
+    end
+
+    def test_true
+      options = Arrow::CastOptions.new
+      options.allow_invalid_utf8 = true
+      string_data_type = Arrow::StringDataType.new
+      assert_equal(build_string_array(["\xff"]),
+                   build_binary_array(["\xff"]).cast(string_data_type, options))
+    end
+  end
 end