You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/11/04 22:17:56 UTC
[arrow-rs] branch master updated: Split out arrow-cast (#2594) (#2998)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new cdc8d0e58 Split out arrow-cast (#2594) (#2998)
cdc8d0e58 is described below
commit cdc8d0e58d51d1770d0e5363ba2985bbd417daa9
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Sat Nov 5 11:17:50 2022 +1300
Split out arrow-cast (#2594) (#2998)
* Split out arrow-cast (#2594)
* Format
---
.github/workflows/arrow.yml | 5 +
.github/workflows/arrow_flight.yml | 1 +
.github/workflows/dev_pr/labeler.yml | 1 +
.github/workflows/integration.yml | 1 +
.github/workflows/miri.yaml | 1 +
.github/workflows/parquet.yml | 1 +
Cargo.toml | 1 +
arrow-cast/Cargo.toml | 52 +++++
.../src/compute/kernels => arrow-cast/src}/cast.rs | 233 +++++++++++----------
{arrow/src/util => arrow-cast/src}/display.rs | 72 ++++---
.../util/serialization.rs => arrow-cast/src/lib.rs | 22 +-
.../cast_utils.rs => arrow-cast/src/parse.rs | 52 ++---
arrow/Cargo.toml | 1 +
arrow/src/compute/kernels/mod.rs | 4 +-
arrow/src/csv/writer.rs | 4 +-
arrow/src/lib.rs | 1 +
arrow/src/util/mod.rs | 3 +-
arrow/src/util/reader_parser.rs | 5 +-
18 files changed, 259 insertions(+), 201 deletions(-)
diff --git a/.github/workflows/arrow.yml b/.github/workflows/arrow.yml
index 868741c33..9ae72dd00 100644
--- a/.github/workflows/arrow.yml
+++ b/.github/workflows/arrow.yml
@@ -28,6 +28,7 @@ on:
- arrow/**
- arrow-array/**
- arrow-buffer/**
+ - arrow-cast/**
- arrow-data/**
- arrow-schema/**
- arrow-select/**
@@ -58,6 +59,8 @@ jobs:
run: cargo test -p arrow-array --all-features
- name: Test arrow-select with all features
run: cargo test -p arrow-select --all-features
+ - name: Test arrow-cast with all features
+ run: cargo test -p arrow-cast --all-features
- name: Test arrow-integration-test with all features
run: cargo test -p arrow-integration-test --all-features
- name: Test arrow with default features
@@ -164,5 +167,7 @@ jobs:
run: cargo clippy -p arrow-array --all-targets --all-features -- -D warnings
- name: Clippy arrow-select with all features
run: cargo clippy -p arrow-select --all-targets --all-features -- -D warnings
+ - name: Clippy arrow-cast with all features
+ run: cargo clippy -p arrow-cast --all-targets --all-features -- -D warnings
- name: Clippy arrow
run: cargo clippy -p arrow --features=prettyprint,csv,ipc,test_utils,ffi,ipc_compression,dyn_cmp_dict,dyn_arith_dict,chrono-tz --all-targets -- -D warnings
diff --git a/.github/workflows/arrow_flight.yml b/.github/workflows/arrow_flight.yml
index 548caeb2a..9621c9e69 100644
--- a/.github/workflows/arrow_flight.yml
+++ b/.github/workflows/arrow_flight.yml
@@ -30,6 +30,7 @@ on:
- arrow/**
- arrow-array/**
- arrow-buffer/**
+ - arrow-cast/**
- arrow-data/**
- arrow-schema/**
- arrow-select/**
diff --git a/.github/workflows/dev_pr/labeler.yml b/.github/workflows/dev_pr/labeler.yml
index e44f5f803..3a0073004 100644
--- a/.github/workflows/dev_pr/labeler.yml
+++ b/.github/workflows/dev_pr/labeler.yml
@@ -19,6 +19,7 @@ arrow:
- arrow/**/*
- arrow-array/**/*
- arrow-buffer/**/*
+ - arrow-cast/**/*
- arrow-data/**/*
- arrow-schema/**/*
- arrow-select/**/*
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 7c1d2972f..c2c0a79e6 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -27,6 +27,7 @@ on:
- arrow/**
- arrow-array/**
- arrow-buffer/**
+ - arrow-cast/**
- arrow-data/**
- arrow-schema/**
- arrow-select/**
diff --git a/.github/workflows/miri.yaml b/.github/workflows/miri.yaml
index 435582347..241b4f0b4 100644
--- a/.github/workflows/miri.yaml
+++ b/.github/workflows/miri.yaml
@@ -27,6 +27,7 @@ on:
- arrow/**
- arrow-array/**
- arrow-buffer/**
+ - arrow-cast/**
- arrow-data/**
- arrow-schema/**
- arrow-select/**
diff --git a/.github/workflows/parquet.yml b/.github/workflows/parquet.yml
index dd1a782c4..5a7beadfd 100644
--- a/.github/workflows/parquet.yml
+++ b/.github/workflows/parquet.yml
@@ -30,6 +30,7 @@ on:
- arrow/**
- arrow-array/**
- arrow-buffer/**
+ - arrow-cast/**
- arrow-data/**
- arrow-schema/**
- arrow-select/**
diff --git a/Cargo.toml b/Cargo.toml
index 6f61b0e45..d8fa5b923 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -20,6 +20,7 @@ members = [
"arrow",
"arrow-array",
"arrow-buffer",
+ "arrow-cast",
"arrow-data",
"arrow-flight",
"arrow-integration-test",
diff --git a/arrow-cast/Cargo.toml b/arrow-cast/Cargo.toml
new file mode 100644
index 000000000..714ea0b48
--- /dev/null
+++ b/arrow-cast/Cargo.toml
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "arrow-cast"
+version = "26.0.0"
+description = "Cast kernel and utilities for Apache Arrow"
+homepage = "https://github.com/apache/arrow-rs"
+repository = "https://github.com/apache/arrow-rs"
+authors = ["Apache Arrow <de...@arrow.apache.org>"]
+license = "Apache-2.0"
+keywords = ["arrow"]
+include = [
+ "benches/*.rs",
+ "src/**/*.rs",
+ "Cargo.toml",
+]
+edition = "2021"
+rust-version = "1.62"
+
+[lib]
+name = "arrow_cast"
+path = "src/lib.rs"
+bench = false
+
+[dependencies]
+arrow-array = { version = "26.0.0", path = "../arrow-array" }
+arrow-buffer = { version = "26.0.0", path = "../arrow-buffer" }
+arrow-data = { version = "26.0.0", path = "../arrow-data" }
+arrow-schema = { version = "26.0.0", path = "../arrow-schema" }
+arrow-select = { version = "26.0.0", path = "../arrow-select" }
+chrono = { version = "0.4", default-features = false, features = ["clock"] }
+num = { version = "0.4", default-features = false, features = ["std"] }
+lexical-core = { version = "^0.8", default-features = false, features = ["write-integers", "write-floats", "parse-integers", "parse-floats"] }
+
+[dev-dependencies]
+
+[build-dependencies]
diff --git a/arrow/src/compute/kernels/cast.rs b/arrow-cast/src/cast.rs
similarity index 97%
rename from arrow/src/compute/kernels/cast.rs
rename to arrow-cast/src/cast.rs
index b1e744d26..a3abe545d 100644
--- a/arrow/src/compute/kernels/cast.rs
+++ b/arrow-cast/src/cast.rs
@@ -21,9 +21,9 @@
//! Example:
//!
//! ```
-//! use arrow::array::*;
-//! use arrow::compute::cast;
-//! use arrow::datatypes::DataType;
+//! use arrow_array::*;
+//! use arrow_cast::cast;
+//! use arrow_schema::DataType;
//! use std::sync::Arc;
//!
//! let a = Int32Array::from(vec![5, 6, 7]);
@@ -36,27 +36,18 @@
//! ```
use chrono::{DateTime, NaiveDateTime, Timelike};
-use std::str;
use std::sync::Arc;
-use crate::buffer::MutableBuffer;
-use crate::compute::kernels::cast_utils::string_to_timestamp_nanos;
-use crate::compute::{divide_scalar, multiply_scalar};
-use crate::compute::{try_unary, unary};
-use crate::datatypes::*;
-use crate::error::{ArrowError, Result};
-use crate::temporal_conversions::{
- as_datetime, EPOCH_DAYS_FROM_CE, MICROSECONDS, MILLISECONDS, MILLISECONDS_IN_DAY,
- NANOSECONDS, SECONDS_IN_DAY,
+use crate::display::{array_value_to_string, lexical_to_string};
+use crate::parse::string_to_timestamp_nanos;
+use arrow_array::{
+ builder::*, cast::*, iterator::ArrayIter, temporal_conversions::*, timezone::Tz,
+ types::*, *,
};
-use crate::{array::*, compute::take};
-use crate::{
- buffer::Buffer, util::display::array_value_to_string,
- util::serialization::lexical_to_string,
-};
-use arrow_array::temporal_conversions::as_datetime_with_timezone;
-use arrow_array::timezone::Tz;
-use arrow_buffer::i256;
+use arrow_buffer::{i256, ArrowNativeType, Buffer, MutableBuffer};
+use arrow_data::ArrayData;
+use arrow_schema::*;
+use arrow_select::take::take;
use num::cast::AsPrimitive;
use num::{NumCast, ToPrimitive};
@@ -305,12 +296,12 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
/// * To or from `StructArray`
/// * List to primitive
/// * Interval and duration
-pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result<ArrayRef> {
+pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result<ArrayRef, ArrowError> {
cast_with_options(array, to_type, &DEFAULT_CAST_OPTIONS)
}
fn cast_integer_to_decimal<
- T: ArrowNumericType,
+ T: ArrowPrimitiveType,
D: DecimalType + ArrowPrimitiveType<Native = M>,
M,
>(
@@ -319,7 +310,7 @@ fn cast_integer_to_decimal<
scale: u8,
base: M,
cast_options: &CastOptions,
-) -> Result<ArrayRef>
+) -> Result<ArrayRef, ArrowError>
where
<T as ArrowPrimitiveType>::Native: AsPrimitive<M>,
M: ArrowNativeTypeOp,
@@ -342,42 +333,43 @@ where
.with_precision_and_scale(precision, scale)
.map(|a| Arc::new(a) as ArrayRef)
} else {
- try_unary::<T, _, D>(array, |v| v.as_().mul_checked(mul))
+ array
+ .try_unary::<_, D, _>(|v| v.as_().mul_checked(mul))
.and_then(|a| a.with_precision_and_scale(precision, scale))
.map(|a| Arc::new(a) as ArrayRef)
}
}
-fn cast_floating_point_to_decimal128<T: ArrowNumericType>(
+fn cast_floating_point_to_decimal128<T: ArrowPrimitiveType>(
array: &PrimitiveArray<T>,
precision: u8,
scale: u8,
-) -> Result<ArrayRef>
+) -> Result<ArrayRef, ArrowError>
where
<T as ArrowPrimitiveType>::Native: AsPrimitive<f64>,
{
let mul = 10_f64.powi(scale as i32);
- unary::<T, _, Decimal128Type>(array, |v| (v.as_() * mul).round() as i128)
+ array
+ .unary::<_, Decimal128Type>(|v| (v.as_() * mul).round() as i128)
.with_precision_and_scale(precision, scale)
.map(|a| Arc::new(a) as ArrayRef)
}
-fn cast_floating_point_to_decimal256<T: ArrowNumericType>(
+fn cast_floating_point_to_decimal256<T: ArrowPrimitiveType>(
array: &PrimitiveArray<T>,
precision: u8,
scale: u8,
-) -> Result<ArrayRef>
+) -> Result<ArrayRef, ArrowError>
where
<T as ArrowPrimitiveType>::Native: AsPrimitive<f64>,
{
let mul = 10_f64.powi(scale as i32);
- unary::<T, _, Decimal256Type>(array, |v| {
- i256::from_i128((v.as_() * mul).round() as i128)
- })
- .with_precision_and_scale(precision, scale)
- .map(|a| Arc::new(a) as ArrayRef)
+ array
+ .unary::<_, Decimal256Type>(|v| i256::from_i128((v.as_() * mul).round() as i128))
+ .with_precision_and_scale(precision, scale)
+ .map(|a| Arc::new(a) as ArrayRef)
}
/// Cast the primitive array using [`PrimitiveArray::reinterpret_cast`]
@@ -386,7 +378,7 @@ fn cast_reinterpret_arrays<
O: ArrowPrimitiveType<Native = I::Native>,
>(
array: &dyn Array,
-) -> Result<ArrayRef> {
+) -> Result<ArrayRef, ArrowError> {
Ok(Arc::new(
as_primitive_array::<I>(array).reinterpret_cast::<O>(),
))
@@ -511,7 +503,7 @@ pub fn cast_with_options(
array: &ArrayRef,
to_type: &DataType,
cast_options: &CastOptions,
-) -> Result<ArrayRef> {
+) -> Result<ArrayRef, ArrowError> {
use DataType::*;
let from_type = array.data_type();
@@ -869,7 +861,7 @@ pub fn cast_with_options(
.iter()
.map(|maybe_value| match maybe_value {
Some(value) => {
- let result = str::from_utf8(value);
+ let result = std::str::from_utf8(value);
if cast_options.safe {
Ok(result.ok())
} else {
@@ -883,7 +875,7 @@ pub fn cast_with_options(
}
None => Ok(None),
})
- .collect::<Result<StringArray>>()?,
+ .collect::<Result<StringArray, _>>()?,
))
}
_ => Err(ArrowError::CastError(format!(
@@ -923,7 +915,7 @@ pub fn cast_with_options(
.iter()
.map(|maybe_value| match maybe_value {
Some(value) => {
- let result = str::from_utf8(value);
+ let result = std::str::from_utf8(value);
if cast_options.safe {
Ok(result.ok())
} else {
@@ -937,7 +929,7 @@ pub fn cast_with_options(
}
None => Ok(None),
})
- .collect::<Result<LargeStringArray>>()?,
+ .collect::<Result<LargeStringArray, _>>()?,
))
}
_ => Err(ArrowError::CastError(format!(
@@ -1394,9 +1386,11 @@ pub fn cast_with_options(
// we either divide or multiply, depending on size of each unit
// units are never the same when the types are the same
let converted = if from_size >= to_size {
- divide_scalar(time_array, from_size / to_size)?
+ let divisor = from_size / to_size;
+ time_array.unary::<_, Int64Type>(|o| o / divisor)
} else {
- multiply_scalar(time_array, to_size / from_size)?
+ let mul = to_size / from_size;
+ time_array.unary::<_, Int64Type>(|o| o * mul)
};
Ok(make_timestamp_array(
&converted,
@@ -1484,7 +1478,7 @@ pub fn cast_with_options(
}
/// Cast to string array to binary array
-fn cast_string_to_binary(array: &ArrayRef) -> Result<ArrayRef> {
+fn cast_string_to_binary(array: &ArrayRef) -> Result<ArrayRef, ArrowError> {
let from_type = array.data_type();
match *from_type {
DataType::Utf8 => {
@@ -1534,7 +1528,7 @@ fn cast_decimal_to_decimal<const BYTE_WIDTH1: usize, const BYTE_WIDTH2: usize>(
input_scale: &u8,
output_precision: &u8,
output_scale: &u8,
-) -> Result<ArrayRef> {
+) -> Result<ArrayRef, ArrowError> {
if input_scale > output_scale {
// For example, input_scale is 4 and output_scale is 3;
// Original value is 11234_i128, and will be cast to 1123_i128.
@@ -1575,7 +1569,7 @@ fn cast_decimal_to_decimal<const BYTE_WIDTH1: usize, const BYTE_WIDTH2: usize>(
.map(Some)
}
})
- .collect::<Result<Vec<_>>>()?;
+ .collect::<Result<Vec<_>, _>>()?;
let output_array = values
.into_iter()
@@ -1631,7 +1625,7 @@ fn cast_decimal_to_decimal<const BYTE_WIDTH1: usize, const BYTE_WIDTH2: usize>(
.map(Some)
}
})
- .collect::<Result<Vec<_>>>()?;
+ .collect::<Result<Vec<_>, _>>()?;
let output_array = values
.into_iter()
@@ -1654,10 +1648,10 @@ fn cast_decimal_to_decimal<const BYTE_WIDTH1: usize, const BYTE_WIDTH2: usize>(
fn cast_numeric_arrays<FROM, TO>(
from: &ArrayRef,
cast_options: &CastOptions,
-) -> Result<ArrayRef>
+) -> Result<ArrayRef, ArrowError>
where
- FROM: ArrowNumericType,
- TO: ArrowNumericType,
+ FROM: ArrowPrimitiveType,
+ TO: ArrowPrimitiveType,
FROM::Native: NumCast,
TO::Native: NumCast,
{
@@ -1680,14 +1674,16 @@ where
// Natural cast between numeric types
// If the value of T can't be casted to R, will throw error
-fn try_numeric_cast<T, R>(from: &PrimitiveArray<T>) -> Result<PrimitiveArray<R>>
+fn try_numeric_cast<T, R>(
+ from: &PrimitiveArray<T>,
+) -> Result<PrimitiveArray<R>, ArrowError>
where
- T: ArrowNumericType,
- R: ArrowNumericType,
+ T: ArrowPrimitiveType,
+ R: ArrowPrimitiveType,
T::Native: NumCast,
R::Native: NumCast,
{
- try_unary(from, |value| {
+ from.try_unary(|value| {
num::cast::cast::<T::Native, R::Native>(value).ok_or_else(|| {
ArrowError::CastError(format!(
"Can't cast value {:?} to type {}",
@@ -1702,8 +1698,8 @@ where
// If the value of T can't be casted to R, it will be converted to null
fn numeric_cast<T, R>(from: &PrimitiveArray<T>) -> PrimitiveArray<R>
where
- T: ArrowNumericType,
- R: ArrowNumericType,
+ T: ArrowPrimitiveType,
+ R: ArrowPrimitiveType,
T::Native: NumCast,
R::Native: NumCast,
{
@@ -1754,7 +1750,7 @@ fn extract_component_from_datetime_array<
mut builder: GenericStringBuilder<OffsetSize>,
tz: &str,
op: F,
-) -> Result<ArrayRef>
+) -> Result<ArrayRef, ArrowError>
where
OffsetSize: OffsetSizeTrait,
F: Fn(DateTime<Tz>) -> String,
@@ -1781,9 +1777,9 @@ where
fn cast_timestamp_to_string<T, OffsetSize>(
array: &ArrayRef,
tz: &Option<String>,
-) -> Result<ArrayRef>
+) -> Result<ArrayRef, ArrowError>
where
- T: ArrowTemporalType + ArrowNumericType,
+ T: ArrowTemporalType + ArrowPrimitiveType,
i64: From<<T as ArrowPrimitiveType>::Native>,
OffsetSize: OffsetSizeTrait,
{
@@ -1816,7 +1812,7 @@ where
/// Cast date32 types to Utf8/LargeUtf8
fn cast_date32_to_string<OffsetSize: OffsetSizeTrait>(
array: &ArrayRef,
-) -> Result<ArrayRef> {
+) -> Result<ArrayRef, ArrowError> {
let array = array.as_any().downcast_ref::<Date32Array>().unwrap();
Ok(Arc::new(
@@ -1835,7 +1831,7 @@ fn cast_date32_to_string<OffsetSize: OffsetSizeTrait>(
/// Cast date64 types to Utf8/LargeUtf8
fn cast_date64_to_string<OffsetSize: OffsetSizeTrait>(
array: &ArrayRef,
-) -> Result<ArrayRef> {
+) -> Result<ArrayRef, ArrowError> {
let array = array.as_any().downcast_ref::<Date64Array>().unwrap();
Ok(Arc::new(
@@ -1852,9 +1848,11 @@ fn cast_date64_to_string<OffsetSize: OffsetSizeTrait>(
}
/// Cast numeric types to Utf8
-fn cast_numeric_to_string<FROM, OffsetSize>(array: &ArrayRef) -> Result<ArrayRef>
+fn cast_numeric_to_string<FROM, OffsetSize>(
+ array: &ArrayRef,
+) -> Result<ArrayRef, ArrowError>
where
- FROM: ArrowNumericType,
+ FROM: ArrowPrimitiveType,
FROM::Native: lexical_core::ToLexical,
OffsetSize: OffsetSizeTrait,
{
@@ -1870,7 +1868,7 @@ fn numeric_to_string_cast<T, OffsetSize>(
from: &PrimitiveArray<T>,
) -> GenericStringArray<OffsetSize>
where
- T: ArrowPrimitiveType + ArrowNumericType,
+ T: ArrowPrimitiveType + ArrowPrimitiveType,
T::Native: lexical_core::ToLexical,
OffsetSize: OffsetSizeTrait,
{
@@ -1883,9 +1881,9 @@ where
fn cast_string_to_numeric<T, Offset: OffsetSizeTrait>(
from: &ArrayRef,
cast_options: &CastOptions,
-) -> Result<ArrayRef>
+) -> Result<ArrayRef, ArrowError>
where
- T: ArrowNumericType,
+ T: ArrowPrimitiveType,
<T as ArrowPrimitiveType>::Native: lexical_core::FromLexical,
{
Ok(Arc::new(string_to_numeric_cast::<T, Offset>(
@@ -1899,9 +1897,9 @@ where
fn string_to_numeric_cast<T, Offset: OffsetSizeTrait>(
from: &GenericStringArray<Offset>,
cast_options: &CastOptions,
-) -> Result<PrimitiveArray<T>>
+) -> Result<PrimitiveArray<T>, ArrowError>
where
- T: ArrowNumericType,
+ T: ArrowPrimitiveType,
<T as ArrowPrimitiveType>::Native: lexical_core::FromLexical,
{
if cast_options.safe {
@@ -1928,7 +1926,7 @@ where
})
.transpose()
})
- .collect::<Result<Vec<_>>>()?;
+ .collect::<Result<Vec<_>, _>>()?;
// Benefit:
// 20% performance improvement
// Soundness:
@@ -1941,7 +1939,7 @@ where
fn cast_string_to_date32<Offset: OffsetSizeTrait>(
array: &dyn Array,
cast_options: &CastOptions,
-) -> Result<ArrayRef> {
+) -> Result<ArrayRef, ArrowError> {
use chrono::Datelike;
let string_array = array
.as_any()
@@ -1979,7 +1977,7 @@ fn cast_string_to_date32<Offset: OffsetSizeTrait>(
})
.transpose()
})
- .collect::<Result<Vec<Option<i32>>>>()?;
+ .collect::<Result<Vec<Option<i32>>, _>>()?;
// Benefit:
// 20% performance improvement
@@ -1995,7 +1993,7 @@ fn cast_string_to_date32<Offset: OffsetSizeTrait>(
fn cast_string_to_date64<Offset: OffsetSizeTrait>(
array: &dyn Array,
cast_options: &CastOptions,
-) -> Result<ArrayRef> {
+) -> Result<ArrayRef, ArrowError> {
let string_array = array
.as_any()
.downcast_ref::<GenericStringArray<Offset>>()
@@ -2032,7 +2030,7 @@ fn cast_string_to_date64<Offset: OffsetSizeTrait>(
})
.transpose()
})
- .collect::<Result<Vec<Option<i64>>>>()?;
+ .collect::<Result<Vec<Option<i64>>, _>>()?;
// Benefit:
// 20% performance improvement
@@ -2048,7 +2046,7 @@ fn cast_string_to_date64<Offset: OffsetSizeTrait>(
fn cast_string_to_time32second<Offset: OffsetSizeTrait>(
array: &dyn Array,
cast_options: &CastOptions,
-) -> Result<ArrayRef> {
+) -> Result<ArrayRef, ArrowError> {
/// The number of nanoseconds per millisecond.
const NANOS_PER_SEC: u32 = 1_000_000_000;
@@ -2096,7 +2094,7 @@ fn cast_string_to_time32second<Offset: OffsetSizeTrait>(
})
.transpose()
})
- .collect::<Result<Vec<Option<i32>>>>()?;
+ .collect::<Result<Vec<Option<i32>>, _>>()?;
// Benefit:
// 20% performance improvement
@@ -2112,7 +2110,7 @@ fn cast_string_to_time32second<Offset: OffsetSizeTrait>(
fn cast_string_to_time32millisecond<Offset: OffsetSizeTrait>(
array: &dyn Array,
cast_options: &CastOptions,
-) -> Result<ArrayRef> {
+) -> Result<ArrayRef, ArrowError> {
/// The number of nanoseconds per millisecond.
const NANOS_PER_MILLI: u32 = 1_000_000;
/// The number of milliseconds per second.
@@ -2162,7 +2160,7 @@ fn cast_string_to_time32millisecond<Offset: OffsetSizeTrait>(
})
.transpose()
})
- .collect::<Result<Vec<Option<i32>>>>()?;
+ .collect::<Result<Vec<Option<i32>>, _>>()?;
// Benefit:
// 20% performance improvement
@@ -2178,7 +2176,7 @@ fn cast_string_to_time32millisecond<Offset: OffsetSizeTrait>(
fn cast_string_to_time64microsecond<Offset: OffsetSizeTrait>(
array: &dyn Array,
cast_options: &CastOptions,
-) -> Result<ArrayRef> {
+) -> Result<ArrayRef, ArrowError> {
/// The number of nanoseconds per microsecond.
const NANOS_PER_MICRO: i64 = 1_000;
/// The number of microseconds per second.
@@ -2226,7 +2224,7 @@ fn cast_string_to_time64microsecond<Offset: OffsetSizeTrait>(
})
.transpose()
})
- .collect::<Result<Vec<Option<i64>>>>()?;
+ .collect::<Result<Vec<Option<i64>>, _>>()?;
// Benefit:
// 20% performance improvement
@@ -2242,7 +2240,7 @@ fn cast_string_to_time64microsecond<Offset: OffsetSizeTrait>(
fn cast_string_to_time64nanosecond<Offset: OffsetSizeTrait>(
array: &dyn Array,
cast_options: &CastOptions,
-) -> Result<ArrayRef> {
+) -> Result<ArrayRef, ArrowError> {
/// The number of nanoseconds per second.
const NANOS_PER_SEC: i64 = 1_000_000_000;
@@ -2288,7 +2286,7 @@ fn cast_string_to_time64nanosecond<Offset: OffsetSizeTrait>(
})
.transpose()
})
- .collect::<Result<Vec<Option<i64>>>>()?;
+ .collect::<Result<Vec<Option<i64>>, _>>()?;
// Benefit:
// 20% performance improvement
@@ -2304,7 +2302,7 @@ fn cast_string_to_time64nanosecond<Offset: OffsetSizeTrait>(
fn cast_string_to_timestamp_ns<Offset: OffsetSizeTrait>(
array: &dyn Array,
cast_options: &CastOptions,
-) -> Result<ArrayRef> {
+) -> Result<ArrayRef, ArrowError> {
let string_array = array
.as_any()
.downcast_ref::<GenericStringArray<Offset>>()
@@ -2323,7 +2321,7 @@ fn cast_string_to_timestamp_ns<Offset: OffsetSizeTrait>(
let vec = string_array
.iter()
.map(|v| v.map(string_to_timestamp_nanos).transpose())
- .collect::<Result<Vec<Option<i64>>>>()?;
+ .collect::<Result<Vec<Option<i64>>, _>>()?;
// Benefit:
// 20% performance improvement
@@ -2336,7 +2334,10 @@ fn cast_string_to_timestamp_ns<Offset: OffsetSizeTrait>(
}
/// Casts Utf8 to Boolean
-fn cast_utf8_to_boolean(from: &ArrayRef, cast_options: &CastOptions) -> Result<ArrayRef> {
+fn cast_utf8_to_boolean(
+ from: &ArrayRef,
+ cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError> {
let array = as_string_array(from);
let output_array = array
@@ -2358,7 +2359,7 @@ fn cast_utf8_to_boolean(from: &ArrayRef, cast_options: &CastOptions) -> Result<A
},
None => Ok(None),
})
- .collect::<Result<BooleanArray>>()?;
+ .collect::<Result<BooleanArray, _>>()?;
Ok(Arc::new(output_array))
}
@@ -2366,9 +2367,9 @@ fn cast_utf8_to_boolean(from: &ArrayRef, cast_options: &CastOptions) -> Result<A
/// Cast numeric types to Boolean
///
/// Any zero value returns `false` while non-zero returns `true`
-fn cast_numeric_to_bool<FROM>(from: &ArrayRef) -> Result<ArrayRef>
+fn cast_numeric_to_bool<FROM>(from: &ArrayRef) -> Result<ArrayRef, ArrowError>
where
- FROM: ArrowNumericType,
+ FROM: ArrowPrimitiveType,
{
numeric_to_bool_cast::<FROM>(
from.as_any()
@@ -2378,9 +2379,9 @@ where
.map(|to| Arc::new(to) as ArrayRef)
}
-fn numeric_to_bool_cast<T>(from: &PrimitiveArray<T>) -> Result<BooleanArray>
+fn numeric_to_bool_cast<T>(from: &PrimitiveArray<T>) -> Result<BooleanArray, ArrowError>
where
- T: ArrowPrimitiveType + ArrowNumericType,
+ T: ArrowPrimitiveType + ArrowPrimitiveType,
{
let mut b = BooleanBuilder::with_capacity(from.len());
@@ -2403,9 +2404,9 @@ where
fn cast_bool_to_numeric<TO>(
from: &ArrayRef,
cast_options: &CastOptions,
-) -> Result<ArrayRef>
+) -> Result<ArrayRef, ArrowError>
where
- TO: ArrowNumericType,
+ TO: ArrowPrimitiveType,
TO::Native: num::cast::NumCast,
{
Ok(Arc::new(bool_to_numeric_cast::<TO>(
@@ -2419,7 +2420,7 @@ fn bool_to_numeric_cast<T>(
_cast_options: &CastOptions,
) -> PrimitiveArray<T>
where
- T: ArrowNumericType,
+ T: ArrowPrimitiveType,
T::Native: num::NumCast,
{
let iter = (0..from.len()).map(|i| {
@@ -2447,7 +2448,7 @@ fn dictionary_cast<K: ArrowDictionaryKeyType>(
array: &ArrayRef,
to_type: &DataType,
cast_options: &CastOptions,
-) -> Result<ArrayRef> {
+) -> Result<ArrayRef, ArrowError> {
use DataType::*;
match to_type {
@@ -2525,7 +2526,7 @@ fn unpack_dictionary<K>(
array: &ArrayRef,
to_type: &DataType,
cast_options: &CastOptions,
-) -> Result<ArrayRef>
+) -> Result<ArrayRef, ArrowError>
where
K: ArrowDictionaryKeyType,
{
@@ -2567,7 +2568,7 @@ fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
array: &ArrayRef,
dict_value_type: &DataType,
cast_options: &CastOptions,
-) -> Result<ArrayRef> {
+) -> Result<ArrayRef, ArrowError> {
use DataType::*;
match *dict_value_type {
@@ -2625,10 +2626,10 @@ fn pack_numeric_to_dictionary<K, V>(
array: &ArrayRef,
dict_value_type: &DataType,
cast_options: &CastOptions,
-) -> Result<ArrayRef>
+) -> Result<ArrayRef, ArrowError>
where
K: ArrowDictionaryKeyType,
- V: ArrowNumericType,
+ V: ArrowPrimitiveType,
{
// attempt to cast the source array values to the target value type (the dictionary values type)
let cast_values = cast_with_options(array, dict_value_type, cast_options)?;
@@ -2656,7 +2657,7 @@ where
fn pack_string_to_dictionary<K>(
array: &ArrayRef,
cast_options: &CastOptions,
-) -> Result<ArrayRef>
+) -> Result<ArrayRef, ArrowError>
where
K: ArrowDictionaryKeyType,
{
@@ -2681,7 +2682,7 @@ fn cast_primitive_to_list<OffsetSize: OffsetSizeTrait + NumCast>(
to: &Field,
to_type: &DataType,
cast_options: &CastOptions,
-) -> Result<ArrayRef> {
+) -> Result<ArrayRef, ArrowError> {
// cast primitive to list's primitive
let cast_array = cast_with_options(array, to.data_type(), cast_options)?;
// create offsets, where if array.len() = 2, we have [0,1,2]
@@ -2721,7 +2722,7 @@ fn cast_list_inner<OffsetSize: OffsetSizeTrait>(
to: &Field,
to_type: &DataType,
cast_options: &CastOptions,
-) -> Result<ArrayRef> {
+) -> Result<ArrayRef, ArrowError> {
let data = array.data_ref();
let underlying_array = make_array(data.child_data()[0].clone());
let cast_array = cast_with_options(&underlying_array, to.data_type(), cast_options)?;
@@ -2745,7 +2746,9 @@ fn cast_list_inner<OffsetSize: OffsetSizeTrait>(
/// Helper function to cast from `Utf8` to `LargeUtf8` and vice versa. If the `LargeUtf8` is too large for
/// a `Utf8` array it will return an Error.
-fn cast_str_container<OffsetSizeFrom, OffsetSizeTo>(array: &dyn Array) -> Result<ArrayRef>
+fn cast_str_container<OffsetSizeFrom, OffsetSizeTo>(
+ array: &dyn Array,
+) -> Result<ArrayRef, ArrowError>
where
OffsetSizeFrom: OffsetSizeTrait + ToPrimitive,
OffsetSizeTo: OffsetSizeTrait + NumCast + ArrowNativeType,
@@ -2760,15 +2763,17 @@ where
let offsets = list_data.buffers()[0].typed_data::<OffsetSizeFrom>();
let mut offset_builder = BufferBuilder::<OffsetSizeTo>::new(offsets.len());
- offsets.iter().try_for_each::<_, Result<_>>(|offset| {
- let offset = OffsetSizeTo::from(*offset).ok_or_else(|| {
- ArrowError::ComputeError(
- "large-utf8 array too large to cast to utf8-array".into(),
- )
+ offsets
+ .iter()
+ .try_for_each::<_, Result<_, ArrowError>>(|offset| {
+ let offset = OffsetSizeTo::from(*offset).ok_or_else(|| {
+ ArrowError::ComputeError(
+ "large-utf8 array too large to cast to utf8-array".into(),
+ )
+ })?;
+ offset_builder.append(offset);
+ Ok(())
})?;
- offset_builder.append(offset);
- Ok(())
- })?;
let offset_buffer = offset_builder.finish();
@@ -2797,7 +2802,7 @@ where
fn cast_list_container<OffsetSizeFrom, OffsetSizeTo>(
array: &dyn Array,
_cast_options: &CastOptions,
-) -> Result<ArrayRef>
+) -> Result<ArrayRef, ArrowError>
where
OffsetSizeFrom: OffsetSizeTrait + ToPrimitive,
OffsetSizeTo: OffsetSizeTrait + NumCast,
@@ -2869,8 +2874,6 @@ where
#[cfg(test)]
mod tests {
use super::*;
- use crate::datatypes::TimeUnit;
- use crate::{buffer::Buffer, util::display::array_value_to_string};
macro_rules! generate_cast_test_case {
($INPUT_ARRAY: expr, $OUTPUT_TYPE_ARRAY: ident, $OUTPUT_TYPE: expr, $OUTPUT_VALUES: expr) => {
@@ -2901,7 +2904,7 @@ mod tests {
array: Vec<Option<i128>>,
precision: u8,
scale: u8,
- ) -> Result<Decimal128Array> {
+ ) -> Result<Decimal128Array, ArrowError> {
array
.into_iter()
.collect::<Decimal128Array>()
@@ -2912,7 +2915,7 @@ mod tests {
array: Vec<Option<i256>>,
precision: u8,
scale: u8,
- ) -> Result<Decimal256Array> {
+ ) -> Result<Decimal256Array, ArrowError> {
array
.into_iter()
.collect::<Decimal256Array>()
@@ -5169,7 +5172,7 @@ mod tests {
/// Convert `array` into a vector of strings by casting to data type dt
fn get_cast_values<T>(array: &ArrayRef, dt: &DataType) -> Vec<String>
where
- T: ArrowNumericType,
+ T: ArrowPrimitiveType,
{
let c = cast(array, dt).unwrap();
let a = c.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
diff --git a/arrow/src/util/display.rs b/arrow-cast/src/display.rs
similarity index 92%
rename from arrow/src/util/display.rs
rename to arrow-cast/src/display.rs
index f5bef1605..b29f844fb 100644
--- a/arrow/src/util/display.rs
+++ b/arrow-cast/src/display.rs
@@ -22,18 +22,11 @@
use std::fmt::Write;
use std::sync::Arc;
-use crate::array::Array;
-use crate::datatypes::{
- ArrowNativeType, ArrowPrimitiveType, DataType, Field, Int16Type, Int32Type,
- Int64Type, Int8Type, TimeUnit, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
- UnionMode,
-};
-use crate::{array, datatypes::IntervalUnit};
-
-use array::DictionaryArray;
-
-use crate::error::{ArrowError, Result};
use arrow_array::timezone::Tz;
+use arrow_array::types::*;
+use arrow_array::*;
+use arrow_buffer::ArrowNativeType;
+use arrow_schema::*;
macro_rules! make_string {
($array_type:ty, $column: ident, $row: ident) => {{
@@ -254,7 +247,7 @@ macro_rules! make_string_from_list {
.value($row);
let string_values = (0..list.len())
.map(|i| array_value_to_string(&list.clone(), i))
- .collect::<Result<Vec<String>>>()?;
+ .collect::<Result<Vec<_>, _>>()?;
Ok(format!("[{}]", string_values.join(", ")))
}};
}
@@ -270,7 +263,7 @@ macro_rules! make_string_from_large_list {
.value($row);
let string_values = (0..list.len())
.map(|i| array_value_to_string(&list, i))
- .collect::<Result<Vec<String>>>()?;
+ .collect::<Result<Vec<_>, _>>()?;
Ok(format!("[{}]", string_values.join(", ")))
}};
}
@@ -286,17 +279,17 @@ macro_rules! make_string_from_fixed_size_list {
.value($row);
let string_values = (0..list.len())
.map(|i| array_value_to_string(&list.clone(), i))
- .collect::<Result<Vec<String>>>()?;
+ .collect::<Result<Vec<_>, _>>()?;
Ok(format!("[{}]", string_values.join(", ")))
}};
}
#[inline(always)]
-pub fn make_string_from_decimal(column: &Arc<dyn Array>, row: usize) -> Result<String> {
- let array = column
- .as_any()
- .downcast_ref::<array::Decimal128Array>()
- .unwrap();
+pub fn make_string_from_decimal(
+ column: &Arc<dyn Array>,
+ row: usize,
+) -> Result<String, ArrowError> {
+ let array = column.as_any().downcast_ref::<Decimal128Array>().unwrap();
Ok(array.value_as_string(row))
}
@@ -306,7 +299,7 @@ fn append_struct_field_string(
name: &str,
field_col: &Arc<dyn Array>,
row: usize,
-) -> Result<()> {
+) -> Result<(), ArrowError> {
target.push('"');
target.push_str(name);
target.push_str("\": ");
@@ -333,7 +326,10 @@ fn append_struct_field_string(
///
/// Note this function is quite inefficient and is unlikely to be
/// suitable for converting large arrays or record batches.
-pub fn array_value_to_string(column: &array::ArrayRef, row: usize) -> Result<String> {
+pub fn array_value_to_string(
+ column: &ArrayRef,
+ row: usize,
+) -> Result<String, ArrowError> {
if column.is_null(row) {
return Ok("".to_string());
}
@@ -487,12 +483,12 @@ pub fn array_value_to_string(column: &array::ArrayRef, row: usize) -> Result<Str
/// Converts the value of the union array at `row` to a String
fn union_to_string(
- column: &array::ArrayRef,
+ column: &ArrayRef,
row: usize,
fields: &[Field],
type_ids: &[i8],
mode: &UnionMode,
-) -> Result<String> {
+) -> Result<String, ArrowError> {
let list = column
.as_any()
.downcast_ref::<array::UnionArray>()
@@ -522,9 +518,9 @@ fn union_to_string(
}
/// Converts the value of the dictionary array at `row` to a String
fn dict_array_value_to_string<K: ArrowPrimitiveType>(
- colum: &array::ArrayRef,
+ colum: &ArrayRef,
row: usize,
-) -> Result<String> {
+) -> Result<String, ArrowError> {
let dict_array = colum.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
let keys_array = dict_array.keys();
@@ -533,13 +529,23 @@ fn dict_array_value_to_string<K: ArrowPrimitiveType>(
return Ok(String::from(""));
}
- let dict_index = keys_array.value(row).to_usize().ok_or_else(|| {
- ArrowError::InvalidArgumentError(format!(
- "Can not convert value {:?} at index {:?} to usize for string conversion.",
- keys_array.value(row),
- row
- ))
- })?;
-
+ let dict_index = keys_array.value(row).as_usize();
array_value_to_string(dict_array.values(), dict_index)
}
+
+/// Converts numeric type to a `String`
+pub fn lexical_to_string<N: lexical_core::ToLexical>(n: N) -> String {
+ let mut buf = Vec::<u8>::with_capacity(N::FORMATTED_SIZE_DECIMAL);
+ unsafe {
+ // JUSTIFICATION
+ // Benefit
+ // Allows using the faster serializer lexical core and convert to string
+ // Soundness
+ // Length of buf is set as written length afterwards. lexical_core
+ // creates a valid string, so doesn't need to be checked.
+ let slice = std::slice::from_raw_parts_mut(buf.as_mut_ptr(), buf.capacity());
+ let len = lexical_core::write(n, slice).len();
+ buf.set_len(len);
+ String::from_utf8_unchecked(buf)
+ }
+}
diff --git a/arrow/src/util/serialization.rs b/arrow-cast/src/lib.rs
similarity index 52%
rename from arrow/src/util/serialization.rs
rename to arrow-cast/src/lib.rs
index 14d67ca11..397e5667e 100644
--- a/arrow/src/util/serialization.rs
+++ b/arrow-cast/src/lib.rs
@@ -15,19 +15,9 @@
// specific language governing permissions and limitations
// under the License.
-/// Converts numeric type to a `String`
-pub fn lexical_to_string<N: lexical_core::ToLexical>(n: N) -> String {
- let mut buf = Vec::<u8>::with_capacity(N::FORMATTED_SIZE_DECIMAL);
- unsafe {
- // JUSTIFICATION
- // Benefit
- // Allows using the faster serializer lexical core and convert to string
- // Soundness
- // Length of buf is set as written length afterwards. lexical_core
- // creates a valid string, so doesn't need to be checked.
- let slice = std::slice::from_raw_parts_mut(buf.as_mut_ptr(), buf.capacity());
- let len = lexical_core::write(n, slice).len();
- buf.set_len(len);
- String::from_utf8_unchecked(buf)
- }
-}
+//! Cast kernel for [Apache Arrow](https://docs.rs/arrow)
+
+pub mod cast;
+pub use cast::*;
+pub mod display;
+pub mod parse;
diff --git a/arrow/src/compute/kernels/cast_utils.rs b/arrow-cast/src/parse.rs
similarity index 87%
rename from arrow/src/compute/kernels/cast_utils.rs
rename to arrow-cast/src/parse.rs
index 718ea5ac6..8a9d34b4c 100644
--- a/arrow/src/compute/kernels/cast_utils.rs
+++ b/arrow-cast/src/parse.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-use crate::error::{ArrowError, Result};
+use arrow_schema::ArrowError;
use chrono::prelude::*;
/// Accepts a string in RFC3339 / ISO8601 standard format and some
@@ -66,7 +66,7 @@ use chrono::prelude::*;
/// timestamp will be interpreted as though it were
/// `1997-01-31T09:26:56.123-05:00`
#[inline]
-pub fn string_to_timestamp_nanos(s: &str) -> Result<i64> {
+pub fn string_to_timestamp_nanos(s: &str) -> Result<i64, ArrowError> {
// Fast path: RFC3339 timestamp (with a T)
// Example: 2020-09-08T13:42:29.190855Z
if let Ok(ts) = DateTime::parse_from_rfc3339(s) {
@@ -135,52 +135,50 @@ mod tests {
use super::*;
#[test]
- fn string_to_timestamp_timezone() -> Result<()> {
+ fn string_to_timestamp_timezone() {
// Explicit timezone
assert_eq!(
1599572549190855000,
- parse_timestamp("2020-09-08T13:42:29.190855+00:00")?
+ parse_timestamp("2020-09-08T13:42:29.190855+00:00").unwrap()
);
assert_eq!(
1599572549190855000,
- parse_timestamp("2020-09-08T13:42:29.190855Z")?
+ parse_timestamp("2020-09-08T13:42:29.190855Z").unwrap()
);
assert_eq!(
1599572549000000000,
- parse_timestamp("2020-09-08T13:42:29Z")?
+ parse_timestamp("2020-09-08T13:42:29Z").unwrap()
); // no fractional part
assert_eq!(
1599590549190855000,
- parse_timestamp("2020-09-08T13:42:29.190855-05:00")?
+ parse_timestamp("2020-09-08T13:42:29.190855-05:00").unwrap()
);
- Ok(())
}
#[test]
- fn string_to_timestamp_timezone_space() -> Result<()> {
+ fn string_to_timestamp_timezone_space() {
// Ensure space rather than T between time and date is accepted
assert_eq!(
1599572549190855000,
- parse_timestamp("2020-09-08 13:42:29.190855+00:00")?
+ parse_timestamp("2020-09-08 13:42:29.190855+00:00").unwrap()
);
assert_eq!(
1599572549190855000,
- parse_timestamp("2020-09-08 13:42:29.190855Z")?
+ parse_timestamp("2020-09-08 13:42:29.190855Z").unwrap()
);
assert_eq!(
1599572549000000000,
- parse_timestamp("2020-09-08 13:42:29Z")?
+ parse_timestamp("2020-09-08 13:42:29Z").unwrap()
); // no fractional part
assert_eq!(
1599590549190855000,
- parse_timestamp("2020-09-08 13:42:29.190855-05:00")?
+ parse_timestamp("2020-09-08 13:42:29.190855-05:00").unwrap()
);
- Ok(())
}
#[test]
#[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function: mktime
- fn string_to_timestamp_no_timezone() -> Result<()> {
+ fn string_to_timestamp_no_timezone() {
// This test is designed to succeed in regardless of the local
// timezone the test machine is running. Thus it is still
// somewhat susceptible to bugs in the use of chrono
@@ -192,12 +190,12 @@ mod tests {
// Ensure both T and ' ' variants work
assert_eq!(
naive_datetime.timestamp_nanos(),
- parse_timestamp("2020-09-08T13:42:29.190855")?
+ parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
);
assert_eq!(
naive_datetime.timestamp_nanos(),
- parse_timestamp("2020-09-08 13:42:29.190855")?
+ parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
);
// Also ensure that parsing timestamps with no fractional
@@ -210,15 +208,13 @@ mod tests {
// Ensure both T and ' ' variants work
assert_eq!(
naive_datetime_whole_secs.timestamp_nanos(),
- parse_timestamp("2020-09-08T13:42:29")?
+ parse_timestamp("2020-09-08T13:42:29").unwrap()
);
assert_eq!(
naive_datetime_whole_secs.timestamp_nanos(),
- parse_timestamp("2020-09-08 13:42:29")?
+ parse_timestamp("2020-09-08 13:42:29").unwrap()
);
-
- Ok(())
}
#[test]
@@ -235,7 +231,7 @@ mod tests {
}
// Parse a timestamp to timestamp int with a useful human readable error message
- fn parse_timestamp(s: &str) -> Result<i64> {
+ fn parse_timestamp(s: &str) -> Result<i64, ArrowError> {
let result = string_to_timestamp_nanos(s);
if let Err(e) = &result {
eprintln!("Error parsing timestamp '{}': {:?}", s, e);
@@ -258,7 +254,7 @@ mod tests {
}
#[test]
- fn string_without_timezone_to_timestamp() -> Result<()> {
+ fn string_without_timezone_to_timestamp() {
// string without timezone should always output the same regardless the local or session timezone
let naive_datetime = NaiveDateTime::new(
@@ -269,12 +265,12 @@ mod tests {
// Ensure both T and ' ' variants work
assert_eq!(
naive_datetime.timestamp_nanos(),
- parse_timestamp("2020-09-08T13:42:29.190855")?
+ parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
);
assert_eq!(
naive_datetime.timestamp_nanos(),
- parse_timestamp("2020-09-08 13:42:29.190855")?
+ parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
);
let naive_datetime = NaiveDateTime::new(
@@ -285,14 +281,12 @@ mod tests {
// Ensure both T and ' ' variants work
assert_eq!(
naive_datetime.timestamp_nanos(),
- parse_timestamp("2020-09-08T13:42:29")?
+ parse_timestamp("2020-09-08T13:42:29").unwrap()
);
assert_eq!(
naive_datetime.timestamp_nanos(),
- parse_timestamp("2020-09-08 13:42:29")?
+ parse_timestamp("2020-09-08 13:42:29").unwrap()
);
-
- Ok(())
}
}
diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index 5b2639b7f..5749f6799 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -45,6 +45,7 @@ ahash = { version = "0.8", default-features = false, features = ["runtime-rng"]
[dependencies]
arrow-buffer = { version = "26.0.0", path = "../arrow-buffer" }
+arrow-cast = { version = "26.0.0", path = "../arrow-cast" }
arrow-data = { version = "26.0.0", path = "../arrow-data" }
arrow-schema = { version = "26.0.0", path = "../arrow-schema" }
arrow-array = { version = "26.0.0", path = "../arrow-array" }
diff --git a/arrow/src/compute/kernels/mod.rs b/arrow/src/compute/kernels/mod.rs
index a772f5bcc..9ffa53eb2 100644
--- a/arrow/src/compute/kernels/mod.rs
+++ b/arrow/src/compute/kernels/mod.rs
@@ -22,8 +22,6 @@ pub mod arithmetic;
pub mod arity;
pub mod bitwise;
pub mod boolean;
-pub mod cast;
-pub mod cast_utils;
pub mod comparison;
pub mod concat_elements;
pub mod length;
@@ -36,4 +34,6 @@ pub mod temporal;
pub mod window;
pub mod zip;
+pub use arrow_cast::cast;
+pub use arrow_cast::parse as cast_utils;
pub use arrow_select::{concat, filter, interleave, take};
diff --git a/arrow/src/csv/writer.rs b/arrow/src/csv/writer.rs
index fb3348d94..b2d02fe84 100644
--- a/arrow/src/csv/writer.rs
+++ b/arrow/src/csv/writer.rs
@@ -67,12 +67,12 @@ use arrow_array::timezone::Tz;
use chrono::{DateTime, Utc};
use std::io::Write;
+use crate::array::*;
use crate::csv::map_csv_error;
use crate::datatypes::*;
use crate::error::{ArrowError, Result};
use crate::record_batch::RecordBatch;
-use crate::util::display::make_string_from_decimal;
-use crate::{array::*, util::serialization::lexical_to_string};
+use crate::util::display::{lexical_to_string, make_string_from_decimal};
const DEFAULT_DATE_FORMAT: &str = "%F";
const DEFAULT_TIME_FORMAT: &str = "%T";
diff --git a/arrow/src/lib.rs b/arrow/src/lib.rs
index 7089c7152..0081856f3 100644
--- a/arrow/src/lib.rs
+++ b/arrow/src/lib.rs
@@ -33,6 +33,7 @@
//!
//! * [`arrow-array`][arrow_array] - type-safe arrow array abstractions
//! * [`arrow-buffer`][arrow_buffer] - buffer abstractions for arrow arrays
+//! * [`arrow-cast`][arrow_cast] - cast kernels for arrow arrays
//! * [`arrow-data`][arrow_data] - the underlying data of arrow arrays
//! * [`arrow-schema`][arrow_schema] - the logical types for arrow arrays
//! * [`arrow-select`][arrow_select] - selection kernels for arrow arrays
diff --git a/arrow/src/util/mod.rs b/arrow/src/util/mod.rs
index f0b9e0076..9a0ca852a 100644
--- a/arrow/src/util/mod.rs
+++ b/arrow/src/util/mod.rs
@@ -24,12 +24,11 @@ pub use arrow_data::bit_mask;
pub mod bench_util;
#[cfg(feature = "test_utils")]
pub mod data_gen;
-pub mod display;
#[cfg(feature = "prettyprint")]
pub mod pretty;
-pub(crate) mod serialization;
pub mod string_writer;
#[cfg(any(test, feature = "test_utils"))]
pub mod test_util;
+pub use arrow_cast::display;
pub(crate) mod reader_parser;
diff --git a/arrow/src/util/reader_parser.rs b/arrow/src/util/reader_parser.rs
index 60082e8dd..efee62905 100644
--- a/arrow/src/util/reader_parser.rs
+++ b/arrow/src/util/reader_parser.rs
@@ -15,8 +15,9 @@
// specific language governing permissions and limitations
// under the License.
-use crate::compute::kernels::cast_utils::string_to_timestamp_nanos;
-use crate::datatypes::*;
+use arrow_array::types::*;
+use arrow_array::*;
+use arrow_cast::parse::string_to_timestamp_nanos;
/// Specialized parsing implementations
/// used by csv and json reader