You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/12/20 23:48:05 UTC
[arrow-rs] branch master updated: Infer JSON as UTF-8 (#3376)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new a8968cd26 Infer JSON as UTF-8 (#3376)
a8968cd26 is described below
commit a8968cd2677d7515915e9d33549b13dfb4a5b2ae
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Tue Dec 20 23:48:00 2022 +0000
Infer JSON as UTF-8 (#3376)
---
parquet/src/arrow/{schema.rs => schema/mod.rs} | 2 ++
parquet/src/arrow/schema/primitive.rs | 4 ++--
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/parquet/src/arrow/schema.rs b/parquet/src/arrow/schema/mod.rs
similarity index 99%
rename from parquet/src/arrow/schema.rs
rename to parquet/src/arrow/schema/mod.rs
index 464b86d0c..120612822 100644
--- a/parquet/src/arrow/schema.rs
+++ b/parquet/src/arrow/schema/mod.rs
@@ -526,6 +526,7 @@ mod tests {
OPTIONAL FLOAT float;
OPTIONAL BINARY string (UTF8);
OPTIONAL BINARY string_2 (STRING);
+ OPTIONAL BINARY json (JSON);
}
";
let parquet_group_type = parse_message_type(message_type).unwrap();
@@ -546,6 +547,7 @@ mod tests {
Field::new("float", DataType::Float32, true),
Field::new("string", DataType::Utf8, true),
Field::new("string_2", DataType::Utf8, true),
+ Field::new("json", DataType::Utf8, true),
];
assert_eq!(&arrow_fields, converted_arrow_schema.fields());
diff --git a/parquet/src/arrow/schema/primitive.rs b/parquet/src/arrow/schema/primitive.rs
index e5bab9ac9..bd56583a8 100644
--- a/parquet/src/arrow/schema/primitive.rs
+++ b/parquet/src/arrow/schema/primitive.rs
@@ -227,11 +227,11 @@ fn from_int64(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataTy
fn from_byte_array(info: &BasicTypeInfo, precision: i32, scale: i32) -> Result<DataType> {
match (info.logical_type(), info.converted_type()) {
(Some(LogicalType::String), _) => Ok(DataType::Utf8),
- (Some(LogicalType::Json), _) => Ok(DataType::Binary),
+ (Some(LogicalType::Json), _) => Ok(DataType::Utf8),
(Some(LogicalType::Bson), _) => Ok(DataType::Binary),
(Some(LogicalType::Enum), _) => Ok(DataType::Binary),
(None, ConvertedType::NONE) => Ok(DataType::Binary),
- (None, ConvertedType::JSON) => Ok(DataType::Binary),
+ (None, ConvertedType::JSON) => Ok(DataType::Utf8),
(None, ConvertedType::BSON) => Ok(DataType::Binary),
(None, ConvertedType::ENUM) => Ok(DataType::Binary),
(None, ConvertedType::UTF8) => Ok(DataType::Utf8),