You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/12/20 23:48:05 UTC

[arrow-rs] branch master updated: Infer JSON as UTF-8 (#3376)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new a8968cd26 Infer JSON as UTF-8 (#3376)
a8968cd26 is described below

commit a8968cd2677d7515915e9d33549b13dfb4a5b2ae
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Tue Dec 20 23:48:00 2022 +0000

    Infer JSON as UTF-8 (#3376)
---
 parquet/src/arrow/{schema.rs => schema/mod.rs} | 2 ++
 parquet/src/arrow/schema/primitive.rs          | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/parquet/src/arrow/schema.rs b/parquet/src/arrow/schema/mod.rs
similarity index 99%
rename from parquet/src/arrow/schema.rs
rename to parquet/src/arrow/schema/mod.rs
index 464b86d0c..120612822 100644
--- a/parquet/src/arrow/schema.rs
+++ b/parquet/src/arrow/schema/mod.rs
@@ -526,6 +526,7 @@ mod tests {
             OPTIONAL FLOAT   float;
             OPTIONAL BINARY  string (UTF8);
             OPTIONAL BINARY  string_2 (STRING);
+            OPTIONAL BINARY  json (JSON);
         }
         ";
         let parquet_group_type = parse_message_type(message_type).unwrap();
@@ -546,6 +547,7 @@ mod tests {
             Field::new("float", DataType::Float32, true),
             Field::new("string", DataType::Utf8, true),
             Field::new("string_2", DataType::Utf8, true),
+            Field::new("json", DataType::Utf8, true),
         ];
 
         assert_eq!(&arrow_fields, converted_arrow_schema.fields());
diff --git a/parquet/src/arrow/schema/primitive.rs b/parquet/src/arrow/schema/primitive.rs
index e5bab9ac9..bd56583a8 100644
--- a/parquet/src/arrow/schema/primitive.rs
+++ b/parquet/src/arrow/schema/primitive.rs
@@ -227,11 +227,11 @@ fn from_int64(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataTy
 fn from_byte_array(info: &BasicTypeInfo, precision: i32, scale: i32) -> Result<DataType> {
     match (info.logical_type(), info.converted_type()) {
         (Some(LogicalType::String), _) => Ok(DataType::Utf8),
-        (Some(LogicalType::Json), _) => Ok(DataType::Binary),
+        (Some(LogicalType::Json), _) => Ok(DataType::Utf8),
         (Some(LogicalType::Bson), _) => Ok(DataType::Binary),
         (Some(LogicalType::Enum), _) => Ok(DataType::Binary),
         (None, ConvertedType::NONE) => Ok(DataType::Binary),
-        (None, ConvertedType::JSON) => Ok(DataType::Binary),
+        (None, ConvertedType::JSON) => Ok(DataType::Utf8),
         (None, ConvertedType::BSON) => Ok(DataType::Binary),
         (None, ConvertedType::ENUM) => Ok(DataType::Binary),
         (None, ConvertedType::UTF8) => Ok(DataType::Utf8),