You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/11/10 21:28:47 UTC
[arrow-rs] branch master updated: Update parquet to depend on arrow subcrates (#3028)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 4dd7fea13 Update parquet to depend on arrow subcrates (#3028)
4dd7fea13 is described below
commit 4dd7fea13dd2ac62f179fc7fc245037e7036afc9
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Fri Nov 11 10:28:42 2022 +1300
Update parquet to depend on arrow subcrates (#3028)
* Update parquet to depend on arrow subcrates (#3044)
* Fix parquet_derive
* Fix parquet_derive
* Fix no-default-features test compilation
* Fix parquet-fromcsv
* Clippy
---
parquet/Cargo.toml | 16 ++++++--
parquet/src/arrow/array_reader/builder.rs | 2 +-
parquet/src/arrow/array_reader/byte_array.rs | 8 ++--
.../arrow/array_reader/byte_array_dictionary.rs | 25 ++++--------
parquet/src/arrow/array_reader/empty_array.rs | 5 ++-
.../src/arrow/array_reader/fixed_len_byte_array.rs | 15 +++----
parquet/src/arrow/array_reader/list_array.rs | 16 ++++----
parquet/src/arrow/array_reader/map_array.rs | 16 ++++----
parquet/src/arrow/array_reader/mod.rs | 4 +-
parquet/src/arrow/array_reader/null_array.rs | 8 ++--
parquet/src/arrow/array_reader/primitive_array.rs | 21 +++++-----
parquet/src/arrow/array_reader/struct_array.rs | 9 ++---
parquet/src/arrow/array_reader/test_util.rs | 4 +-
parquet/src/arrow/arrow_reader/filter.rs | 13 +++---
parquet/src/arrow/arrow_reader/mod.rs | 43 ++++++++++----------
parquet/src/arrow/arrow_reader/selection.rs | 4 +-
parquet/src/arrow/arrow_writer/byte_array.rs | 6 +--
parquet/src/arrow/arrow_writer/levels.rs | 47 +++++++++++-----------
parquet/src/arrow/arrow_writer/mod.rs | 21 +++++-----
parquet/src/arrow/async_reader.rs | 8 ++--
parquet/src/arrow/buffer/bit_util.rs | 4 +-
parquet/src/arrow/buffer/dictionary_buffer.rs | 11 ++---
parquet/src/arrow/buffer/offset_buffer.rs | 9 +++--
parquet/src/arrow/mod.rs | 12 +++---
parquet/src/arrow/record_reader/buffer.rs | 3 +-
.../src/arrow/record_reader/definition_levels.rs | 8 ++--
parquet/src/arrow/record_reader/mod.rs | 6 +--
parquet/src/arrow/schema.rs | 10 ++---
parquet/src/arrow/schema/complex.rs | 2 +-
parquet/src/arrow/schema/primitive.rs | 2 +-
parquet/src/bin/parquet-fromcsv.rs | 3 +-
parquet/src/column/reader.rs | 1 +
parquet/src/column/writer/encoder.rs | 6 +--
parquet/src/compression.rs | 4 +-
parquet/src/errors.rs | 16 ++++----
parquet/src/file/serialized_reader.rs | 1 +
parquet/src/lib.rs | 2 +-
parquet/src/util/interner.rs | 1 +
parquet_derive/Cargo.toml | 6 +--
39 files changed, 199 insertions(+), 199 deletions(-)
diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml
index a414c1666..65c4009d3 100644
--- a/parquet/Cargo.toml
+++ b/parquet/Cargo.toml
@@ -30,6 +30,15 @@ edition = "2021"
rust-version = "1.62"
[dependencies]
+arrow-array = { version = "26.0.0", path = "../arrow-array", default-features = false, optional = true }
+arrow-buffer = { version = "26.0.0", path = "../arrow-buffer", default-features = false, optional = true }
+arrow-cast = { version = "26.0.0", path = "../arrow-cast", default-features = false, optional = true }
+arrow-csv = { version = "26.0.0", path = "../arrow-csv", default-features = false, optional = true }
+arrow-data = { version = "26.0.0", path = "../arrow-data", default-features = false, optional = true }
+arrow-schema = { version = "26.0.0", path = "../arrow-schema", default-features = false, optional = true }
+arrow-select = { version = "26.0.0", path = "../arrow-select", default-features = false, optional = true }
+arrow-ipc = { version = "26.0.0", path = "../arrow-ipc", default-features = false, optional = true }
+
ahash = { version = "0.8", default-features = false, features = ["compile-time-rng"] }
bytes = { version = "1.1", default-features = false, features = ["std"] }
thrift = { version = "0.16", default-features = false }
@@ -41,7 +50,6 @@ zstd = { version = "0.11.1", optional = true, default-features = false }
chrono = { version = "0.4", default-features = false, features = ["alloc"] }
num = { version = "0.4", default-features = false }
num-bigint = { version = "0.4", default-features = false }
-arrow = { path = "../arrow", version = "26.0.0", optional = true, default-features = false, features = ["ipc"] }
base64 = { version = "0.13", default-features = false, features = ["std"], optional = true }
clap = { version = "4", default-features = false, features = ["std", "derive", "env", "help", "error-context", "usage"], optional = true }
serde_json = { version = "1.0", default-features = false, features = ["std"], optional = true }
@@ -70,9 +78,9 @@ all-features = true
[features]
default = ["arrow", "snap", "brotli", "flate2", "lz4", "zstd", "base64"]
# Enable arrow reader/writer APIs
-arrow = ["dep:arrow", "base64"]
+arrow = ["base64", "arrow-array", "arrow-buffer", "arrow-cast", "arrow-data", "arrow-schema", "arrow-select", "arrow-ipc"]
# Enable CLI tools
-cli = ["json", "base64", "clap", "arrow/csv"]
+cli = ["json", "base64", "clap", "arrow-csv"]
# Enable JSON APIs
json = ["serde_json", "base64"]
# Enable internal testing APIs
@@ -100,7 +108,7 @@ required-features = ["cli"]
[[bin]]
name = "parquet-fromcsv"
-required-features = ["cli"]
+required-features = ["arrow", "cli"]
[[bench]]
name = "arrow_writer"
diff --git a/parquet/src/arrow/array_reader/builder.rs b/parquet/src/arrow/array_reader/builder.rs
index c0216466d..246bccfec 100644
--- a/parquet/src/arrow/array_reader/builder.rs
+++ b/parquet/src/arrow/array_reader/builder.rs
@@ -17,7 +17,7 @@
use std::sync::Arc;
-use arrow::datatypes::DataType;
+use arrow_schema::DataType;
use crate::arrow::array_reader::empty_array::make_empty_array_reader;
use crate::arrow::array_reader::fixed_len_byte_array::make_fixed_len_byte_array_reader;
diff --git a/parquet/src/arrow/array_reader/byte_array.rs b/parquet/src/arrow/array_reader/byte_array.rs
index 4bf4dee0d..22fa0ab45 100644
--- a/parquet/src/arrow/array_reader/byte_array.rs
+++ b/parquet/src/arrow/array_reader/byte_array.rs
@@ -30,9 +30,9 @@ use crate::encodings::decoding::{Decoder, DeltaBitPackDecoder};
use crate::errors::{ParquetError, Result};
use crate::schema::types::ColumnDescPtr;
use crate::util::memory::ByteBufferPtr;
-use arrow::array::{Array, ArrayRef, BinaryArray, Decimal128Array, OffsetSizeTrait};
-use arrow::buffer::Buffer;
-use arrow::datatypes::DataType as ArrowType;
+use arrow_array::{Array, ArrayRef, BinaryArray, Decimal128Array, OffsetSizeTrait};
+use arrow_buffer::Buffer;
+use arrow_schema::DataType as ArrowType;
use std::any::Any;
use std::ops::Range;
use std::sync::Arc;
@@ -587,7 +587,7 @@ mod tests {
use super::*;
use crate::arrow::array_reader::test_util::{byte_array_all_encodings, utf8_column};
use crate::arrow::record_reader::buffer::ValuesBuffer;
- use arrow::array::{Array, StringArray};
+ use arrow_array::{Array, StringArray};
#[test]
fn test_byte_array_decoder() {
diff --git a/parquet/src/arrow/array_reader/byte_array_dictionary.rs b/parquet/src/arrow/array_reader/byte_array_dictionary.rs
index 0a5d94fa6..c4ed7e907 100644
--- a/parquet/src/arrow/array_reader/byte_array_dictionary.rs
+++ b/parquet/src/arrow/array_reader/byte_array_dictionary.rs
@@ -20,9 +20,9 @@ use std::marker::PhantomData;
use std::ops::Range;
use std::sync::Arc;
-use arrow::array::{Array, ArrayRef, OffsetSizeTrait};
-use arrow::buffer::Buffer;
-use arrow::datatypes::{ArrowNativeType, DataType as ArrowType};
+use arrow_array::{Array, ArrayRef, OffsetSizeTrait};
+use arrow_buffer::{ArrowNativeType, Buffer};
+use arrow_schema::DataType as ArrowType;
use crate::arrow::array_reader::byte_array::{ByteArrayDecoder, ByteArrayDecoderPlain};
use crate::arrow::array_reader::{read_records, skip_records, ArrayReader};
@@ -188,15 +188,11 @@ where
}
fn get_def_levels(&self) -> Option<&[i16]> {
- self.def_levels_buffer
- .as_ref()
- .map(|buf| buf.typed_data())
+ self.def_levels_buffer.as_ref().map(|buf| buf.typed_data())
}
fn get_rep_levels(&self) -> Option<&[i16]> {
- self.rep_levels_buffer
- .as_ref()
- .map(|buf| buf.typed_data())
+ self.rep_levels_buffer.as_ref().map(|buf| buf.typed_data())
}
}
@@ -395,7 +391,7 @@ where
#[cfg(test)]
mod tests {
- use arrow::array::{Array, StringArray};
+ use arrow_array::{Array, StringArray};
use arrow::compute::cast;
use crate::arrow::array_reader::test_util::{
@@ -528,13 +524,7 @@ mod tests {
assert_eq!(
strings.iter().collect::<Vec<_>>(),
- vec![
- Some("0"),
- Some("1"),
- Some("1"),
- Some("2"),
- Some("2"),
- ]
+ vec![Some("0"), Some("1"), Some("1"), Some("2"), Some("2"),]
)
}
@@ -625,7 +615,6 @@ mod tests {
}
}
-
#[test]
fn test_too_large_dictionary() {
let data: Vec<_> = (0..128)
diff --git a/parquet/src/arrow/array_reader/empty_array.rs b/parquet/src/arrow/array_reader/empty_array.rs
index abe839b9d..2a3711fa0 100644
--- a/parquet/src/arrow/array_reader/empty_array.rs
+++ b/parquet/src/arrow/array_reader/empty_array.rs
@@ -17,8 +17,9 @@
use crate::arrow::array_reader::ArrayReader;
use crate::errors::Result;
-use arrow::array::{ArrayDataBuilder, ArrayRef, StructArray};
-use arrow::datatypes::DataType as ArrowType;
+use arrow_schema::DataType as ArrowType;
+use arrow_array::{ArrayRef, StructArray};
+use arrow_data::ArrayDataBuilder;
use std::any::Any;
use std::sync::Arc;
diff --git a/parquet/src/arrow/array_reader/fixed_len_byte_array.rs b/parquet/src/arrow/array_reader/fixed_len_byte_array.rs
index ba3a02c4f..e8d426d3a 100644
--- a/parquet/src/arrow/array_reader/fixed_len_byte_array.rs
+++ b/parquet/src/arrow/array_reader/fixed_len_byte_array.rs
@@ -27,12 +27,13 @@ use crate::column::reader::decoder::{ColumnValueDecoder, ValuesBufferSlice};
use crate::errors::{ParquetError, Result};
use crate::schema::types::ColumnDescPtr;
use crate::util::memory::ByteBufferPtr;
-use arrow::array::{
- ArrayDataBuilder, ArrayRef, Decimal128Array, FixedSizeBinaryArray,
- IntervalDayTimeArray, IntervalYearMonthArray,
+use arrow_array::{
+ ArrayRef, Decimal128Array, FixedSizeBinaryArray, IntervalDayTimeArray,
+ IntervalYearMonthArray,
};
-use arrow::buffer::Buffer;
-use arrow::datatypes::{DataType as ArrowType, IntervalUnit};
+use arrow_buffer::Buffer;
+use arrow_data::ArrayDataBuilder;
+use arrow_schema::{DataType as ArrowType, IntervalUnit};
use std::any::Any;
use std::ops::Range;
use std::sync::Arc;
@@ -427,10 +428,10 @@ mod tests {
use super::*;
use crate::arrow::arrow_reader::ParquetRecordBatchReader;
use crate::arrow::ArrowWriter;
- use arrow::array::{Array, Decimal128Array, ListArray};
+ use arrow_array::{Array, Decimal128Array, ListArray};
use arrow::datatypes::Field;
use arrow::error::Result as ArrowResult;
- use arrow::record_batch::RecordBatch;
+ use arrow_array::RecordBatch;
use bytes::Bytes;
use std::sync::Arc;
diff --git a/parquet/src/arrow/array_reader/list_array.rs b/parquet/src/arrow/array_reader/list_array.rs
index f0b5092e1..965142f38 100644
--- a/parquet/src/arrow/array_reader/list_array.rs
+++ b/parquet/src/arrow/array_reader/list_array.rs
@@ -18,13 +18,14 @@
use crate::arrow::array_reader::ArrayReader;
use crate::errors::ParquetError;
use crate::errors::Result;
-use arrow::array::{
- new_empty_array, Array, ArrayData, ArrayRef, BooleanBufferBuilder, GenericListArray,
- MutableArrayData, OffsetSizeTrait,
+use arrow_array::{
+ builder::BooleanBufferBuilder, new_empty_array, Array, ArrayRef, GenericListArray,
+ OffsetSizeTrait,
};
-use arrow::buffer::Buffer;
-use arrow::datatypes::DataType as ArrowType;
-use arrow::datatypes::ToByteSlice;
+use arrow_buffer::Buffer;
+use arrow_buffer::ToByteSlice;
+use arrow_data::{transform::MutableArrayData, ArrayData};
+use arrow_schema::DataType as ArrowType;
use std::any::Any;
use std::cmp::Ordering;
use std::marker::PhantomData;
@@ -257,8 +258,9 @@ mod tests {
use crate::file::reader::{FileReader, SerializedFileReader};
use crate::schema::parser::parse_message_type;
use crate::schema::types::SchemaDescriptor;
- use arrow::array::{Array, ArrayDataBuilder, PrimitiveArray};
use arrow::datatypes::{Field, Int32Type as ArrowInt32, Int32Type};
+ use arrow_array::{Array, PrimitiveArray};
+ use arrow_data::ArrayDataBuilder;
use std::sync::Arc;
fn list_type<OffsetSize: OffsetSizeTrait>(
diff --git a/parquet/src/arrow/array_reader/map_array.rs b/parquet/src/arrow/array_reader/map_array.rs
index bb80fdbdc..cd1a76e86 100644
--- a/parquet/src/arrow/array_reader/map_array.rs
+++ b/parquet/src/arrow/array_reader/map_array.rs
@@ -17,8 +17,8 @@
use crate::arrow::array_reader::{ArrayReader, ListArrayReader, StructArrayReader};
use crate::errors::Result;
-use arrow::array::{Array, ArrayRef, MapArray};
-use arrow::datatypes::DataType as ArrowType;
+use arrow_array::{Array, ArrayRef, MapArray};
+use arrow_schema::DataType as ArrowType;
use std::any::Any;
use std::sync::Arc;
@@ -125,10 +125,10 @@ mod tests {
use super::*;
use crate::arrow::arrow_reader::ParquetRecordBatchReader;
use crate::arrow::ArrowWriter;
- use arrow::array;
- use arrow::array::{MapBuilder, PrimitiveBuilder, StringBuilder};
use arrow::datatypes::{Field, Int32Type, Schema};
- use arrow::record_batch::RecordBatch;
+ use arrow_array::builder::{MapBuilder, PrimitiveBuilder, StringBuilder};
+ use arrow_array::cast::*;
+ use arrow_array::RecordBatch;
use bytes::Bytes;
#[test]
@@ -203,9 +203,9 @@ mod tests {
let col = record_batch.column(0);
assert!(col.is_null(0));
assert!(col.is_null(1));
- let map_entry = array::as_map_array(col).value(2);
- let struct_col = array::as_struct_array(&map_entry);
- let key_col = array::as_string_array(struct_col.column(0)); // Key column
+ let map_entry = as_map_array(col).value(2);
+ let struct_col = as_struct_array(&map_entry);
+ let key_col = as_string_array(struct_col.column(0)); // Key column
assert_eq!(key_col.value(0), "three");
assert_eq!(key_col.value(1), "four");
assert_eq!(key_col.value(2), "five");
diff --git a/parquet/src/arrow/array_reader/mod.rs b/parquet/src/arrow/array_reader/mod.rs
index 3740f0fae..aede5e86c 100644
--- a/parquet/src/arrow/array_reader/mod.rs
+++ b/parquet/src/arrow/array_reader/mod.rs
@@ -18,8 +18,8 @@
//! Logic for reading into arrow arrays
use crate::errors::Result;
-use arrow::array::ArrayRef;
-use arrow::datatypes::DataType as ArrowType;
+use arrow_array::ArrayRef;
+use arrow_schema::DataType as ArrowType;
use std::any::Any;
use std::sync::Arc;
diff --git a/parquet/src/arrow/array_reader/null_array.rs b/parquet/src/arrow/array_reader/null_array.rs
index 405633f0a..4ad6c97e2 100644
--- a/parquet/src/arrow/array_reader/null_array.rs
+++ b/parquet/src/arrow/array_reader/null_array.rs
@@ -22,9 +22,9 @@ use crate::column::page::PageIterator;
use crate::data_type::DataType;
use crate::errors::Result;
use crate::schema::types::ColumnDescPtr;
-use arrow::array::ArrayRef;
-use arrow::buffer::Buffer;
-use arrow::datatypes::DataType as ArrowType;
+use arrow_array::ArrayRef;
+use arrow_buffer::Buffer;
+use arrow_schema::DataType as ArrowType;
use std::any::Any;
use std::sync::Arc;
@@ -82,7 +82,7 @@ where
fn consume_batch(&mut self) -> Result<ArrayRef> {
// convert to arrays
- let array = arrow::array::NullArray::new(self.record_reader.num_values());
+ let array = arrow_array::NullArray::new(self.record_reader.num_values());
// save definition and repetition buffers
self.def_levels_buffer = self.record_reader.consume_def_levels();
diff --git a/parquet/src/arrow/array_reader/primitive_array.rs b/parquet/src/arrow/array_reader/primitive_array.rs
index 5fc5e639d..012cad5c4 100644
--- a/parquet/src/arrow/array_reader/primitive_array.rs
+++ b/parquet/src/arrow/array_reader/primitive_array.rs
@@ -24,13 +24,14 @@ use crate::column::page::PageIterator;
use crate::data_type::{DataType, Int96};
use crate::errors::{ParquetError, Result};
use crate::schema::types::ColumnDescPtr;
-use arrow::array::{
- ArrayDataBuilder, ArrayRef, BooleanArray, BooleanBufferBuilder, Decimal128Array,
- Float32Array, Float64Array, Int32Array, Int64Array, TimestampNanosecondArray,
- TimestampNanosecondBufferBuilder, UInt32Array, UInt64Array,
+use arrow_array::{
+ builder::{BooleanBufferBuilder, TimestampNanosecondBufferBuilder},
+ ArrayRef, BooleanArray, Decimal128Array, Float32Array, Float64Array, Int32Array,
+ Int64Array, TimestampNanosecondArray, UInt32Array, UInt64Array,
};
-use arrow::buffer::Buffer;
-use arrow::datatypes::{DataType as ArrowType, TimeUnit};
+use arrow_buffer::Buffer;
+use arrow_data::ArrayDataBuilder;
+use arrow_schema::{DataType as ArrowType, TimeUnit};
use std::any::Any;
use std::sync::Arc;
@@ -205,8 +206,8 @@ where
let array = match target_type {
ArrowType::Date64 => {
// this is cheap as it internally reinterprets the data
- let a = arrow::compute::cast(&array, &ArrowType::Date32)?;
- arrow::compute::cast(&a, target_type)?
+ let a = arrow_cast::cast(&array, &ArrowType::Date32)?;
+ arrow_cast::cast(&a, target_type)?
}
ArrowType::Decimal128(p, s) => {
let array = match array.data_type() {
@@ -236,7 +237,7 @@ where
Arc::new(array) as ArrayRef
}
- _ => arrow::compute::cast(&array, target_type)?,
+ _ => arrow_cast::cast(&array, target_type)?,
};
// save definition and repetition buffers
@@ -270,8 +271,8 @@ mod tests {
use crate::schema::types::SchemaDescriptor;
use crate::util::test_common::rand_gen::make_pages;
use crate::util::InMemoryPageIterator;
- use arrow::array::{Array, PrimitiveArray};
use arrow::datatypes::ArrowPrimitiveType;
+ use arrow_array::{Array, PrimitiveArray};
use arrow::datatypes::DataType::Decimal128;
use rand::distributions::uniform::SampleUniform;
diff --git a/parquet/src/arrow/array_reader/struct_array.rs b/parquet/src/arrow/array_reader/struct_array.rs
index f682f146c..b470be5ad 100644
--- a/parquet/src/arrow/array_reader/struct_array.rs
+++ b/parquet/src/arrow/array_reader/struct_array.rs
@@ -17,10 +17,9 @@
use crate::arrow::array_reader::ArrayReader;
use crate::errors::{ParquetError, Result};
-use arrow::array::{
- ArrayData, ArrayDataBuilder, ArrayRef, BooleanBufferBuilder, StructArray,
-};
-use arrow::datatypes::DataType as ArrowType;
+use arrow_array::{builder::BooleanBufferBuilder, ArrayRef, StructArray};
+use arrow_data::{ArrayData, ArrayDataBuilder};
+use arrow_schema::DataType as ArrowType;
use std::any::Any;
use std::sync::Arc;
@@ -216,9 +215,9 @@ mod tests {
use super::*;
use crate::arrow::array_reader::test_util::InMemoryArrayReader;
use crate::arrow::array_reader::ListArrayReader;
- use arrow::array::{Array, Int32Array, ListArray};
use arrow::buffer::Buffer;
use arrow::datatypes::Field;
+ use arrow_array::{Array, Int32Array, ListArray};
#[test]
fn test_struct_array_reader() {
diff --git a/parquet/src/arrow/array_reader/test_util.rs b/parquet/src/arrow/array_reader/test_util.rs
index ca1aabfd4..6585d4614 100644
--- a/parquet/src/arrow/array_reader/test_util.rs
+++ b/parquet/src/arrow/array_reader/test_util.rs
@@ -15,8 +15,8 @@
// specific language governing permissions and limitations
// under the License.
-use arrow::array::{Array, ArrayRef};
-use arrow::datatypes::DataType as ArrowType;
+use arrow_array::{Array, ArrayRef};
+use arrow_schema::DataType as ArrowType;
use std::any::Any;
use std::sync::Arc;
diff --git a/parquet/src/arrow/arrow_reader/filter.rs b/parquet/src/arrow/arrow_reader/filter.rs
index 8945ccde4..cbded9a6f 100644
--- a/parquet/src/arrow/arrow_reader/filter.rs
+++ b/parquet/src/arrow/arrow_reader/filter.rs
@@ -16,9 +16,8 @@
// under the License.
use crate::arrow::ProjectionMask;
-use arrow::array::BooleanArray;
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
+use arrow_array::{BooleanArray, RecordBatch};
+use arrow_schema::ArrowError;
/// A predicate operating on [`RecordBatch`]
pub trait ArrowPredicate: Send + 'static {
@@ -32,7 +31,7 @@ pub trait ArrowPredicate: Send + 'static {
///
/// Rows that are `true` in the returned [`BooleanArray`] will be returned by the
/// parquet reader, whereas rows that are `false` or `Null` will not be
- fn evaluate(&mut self, batch: RecordBatch) -> ArrowResult<BooleanArray>;
+ fn evaluate(&mut self, batch: RecordBatch) -> Result<BooleanArray, ArrowError>;
}
/// An [`ArrowPredicate`] created from an [`FnMut`]
@@ -43,7 +42,7 @@ pub struct ArrowPredicateFn<F> {
impl<F> ArrowPredicateFn<F>
where
- F: FnMut(RecordBatch) -> ArrowResult<BooleanArray> + Send + 'static,
+ F: FnMut(RecordBatch) -> Result<BooleanArray, ArrowError> + Send + 'static,
{
/// Create a new [`ArrowPredicateFn`]. `f` will be passed batches
/// that contains the columns specified in `projection`
@@ -56,13 +55,13 @@ where
impl<F> ArrowPredicate for ArrowPredicateFn<F>
where
- F: FnMut(RecordBatch) -> ArrowResult<BooleanArray> + Send + 'static,
+ F: FnMut(RecordBatch) -> Result<BooleanArray, ArrowError> + Send + 'static,
{
fn projection(&self) -> &ProjectionMask {
&self.projection
}
- fn evaluate(&mut self, batch: RecordBatch) -> ArrowResult<BooleanArray> {
+ fn evaluate(&mut self, batch: RecordBatch) -> Result<BooleanArray, ArrowError> {
(self.f)(batch)
}
}
diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs
index 19c877dff..35b70a048 100644
--- a/parquet/src/arrow/arrow_reader/mod.rs
+++ b/parquet/src/arrow/arrow_reader/mod.rs
@@ -20,12 +20,10 @@
use std::collections::VecDeque;
use std::sync::Arc;
-use arrow::array::Array;
-use arrow::compute::prep_null_mask_filter;
-use arrow::datatypes::{DataType as ArrowType, Schema, SchemaRef};
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::{RecordBatch, RecordBatchReader};
-use arrow::{array::StructArray, error::ArrowError};
+use arrow_array::{Array, StructArray};
+use arrow_array::{RecordBatch, RecordBatchReader};
+use arrow_schema::{ArrowError, DataType as ArrowType, Schema, SchemaRef};
+use arrow_select::filter::prep_null_mask_filter;
use crate::arrow::array_reader::{
build_array_reader, ArrayReader, FileReaderRowGroupCollection, RowGroupCollection,
@@ -473,7 +471,7 @@ pub struct ParquetRecordBatchReader {
}
impl Iterator for ParquetRecordBatchReader {
- type Item = ArrowResult<RecordBatch>;
+ type Item = Result<RecordBatch, ArrowError>;
fn next(&mut self) -> Option<Self::Item> {
let mut read_records = 0;
@@ -638,11 +636,12 @@ mod tests {
use rand::{thread_rng, Rng, RngCore};
use tempfile::tempfile;
- use arrow::array::*;
- use arrow::buffer::Buffer;
- use arrow::datatypes::{DataType as ArrowDataType, Field, Schema};
- use arrow::error::Result as ArrowResult;
- use arrow::record_batch::{RecordBatch, RecordBatchReader};
+ use arrow_array::builder::*;
+ use arrow_array::*;
+ use arrow_array::{RecordBatch, RecordBatchReader};
+ use arrow_buffer::Buffer;
+ use arrow_data::ArrayDataBuilder;
+ use arrow_schema::{DataType as ArrowDataType, Field, Schema};
use crate::arrow::arrow_reader::{
ArrowPredicateFn, ArrowReaderOptions, ParquetRecordBatchReader,
@@ -714,7 +713,7 @@ mod tests {
file.rewind().unwrap();
let record_reader = ParquetRecordBatchReader::try_new(file, 2).unwrap();
- let batches = record_reader.collect::<ArrowResult<Vec<_>>>().unwrap();
+ let batches = record_reader.collect::<Result<Vec<_>, _>>().unwrap();
assert_eq!(batches.len(), 4);
for batch in &batches[0..3] {
@@ -1067,7 +1066,7 @@ mod tests {
let read = ParquetRecordBatchReader::try_new(Bytes::from(buffer), 3)
.unwrap()
- .collect::<ArrowResult<Vec<_>>>()
+ .collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(&written.slice(0, 3), &read[0]);
@@ -1103,7 +1102,7 @@ mod tests {
let read = ParquetRecordBatchReader::try_new(Bytes::from(buffer), 3)
.unwrap()
- .collect::<ArrowResult<Vec<_>>>()
+ .collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(&written.slice(0, 3), &read[0]);
@@ -1143,7 +1142,7 @@ mod tests {
let read = ParquetRecordBatchReader::try_new(Bytes::from(buffer), 3)
.unwrap()
- .collect::<ArrowResult<Vec<_>>>()
+ .collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(&written.slice(0, 3), &read[0]);
@@ -1153,7 +1152,7 @@ mod tests {
#[test]
fn test_read_decimal_file() {
- use arrow::array::Decimal128Array;
+ use arrow_array::Decimal128Array;
let testdata = arrow::util::test_util::parquet_test_data();
let file_variants = vec![
("byte_array", 4),
@@ -1936,7 +1935,7 @@ mod tests {
let record_reader = ParquetRecordBatchReader::try_new(file, 3).unwrap();
let batches = record_reader
- .collect::<ArrowResult<Vec<RecordBatch>>>()
+ .collect::<Result<Vec<RecordBatch>, _>>()
.unwrap();
assert_eq!(batches.len(), 6);
@@ -2271,7 +2270,7 @@ mod tests {
let expected = get_expected_batches(&data, &selections, batch_size);
let skip_reader = create_skip_reader(&test_file, batch_size, selections);
assert_eq!(
- skip_reader.collect::<ArrowResult<Vec<_>>>().unwrap(),
+ skip_reader.collect::<Result<Vec<_>, _>>().unwrap(),
expected,
"batch_size: {}, selection_len: {}, skip_first: {}",
batch_size,
@@ -2399,7 +2398,7 @@ mod tests {
let batches = ParquetRecordBatchReader::try_new(file, 1024)
.unwrap()
- .collect::<ArrowResult<Vec<_>>>()
+ .collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(batches.len(), 1);
let batch = &batches[0];
@@ -2444,7 +2443,7 @@ mod tests {
let batches = ParquetRecordBatchReader::try_new(file, expected_rows)
.unwrap()
- .collect::<ArrowResult<Vec<_>>>()
+ .collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(batches.len(), 1);
let batch = &batches[0];
@@ -2476,7 +2475,7 @@ mod tests {
let batches = ParquetRecordBatchReader::try_new(file, expected_rows)
.unwrap()
- .collect::<ArrowResult<Vec<_>>>()
+ .collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(batches.len(), 1);
let batch = &batches[0];
diff --git a/parquet/src/arrow/arrow_reader/selection.rs b/parquet/src/arrow/arrow_reader/selection.rs
index 2328c4501..357960906 100644
--- a/parquet/src/arrow/arrow_reader/selection.rs
+++ b/parquet/src/arrow/arrow_reader/selection.rs
@@ -15,8 +15,8 @@
// specific language governing permissions and limitations
// under the License.
-use arrow::array::{Array, BooleanArray};
-use arrow::compute::SlicesIterator;
+use arrow_array::{Array, BooleanArray};
+use arrow_select::filter::SlicesIterator;
use std::cmp::Ordering;
use std::collections::VecDeque;
use std::ops::Range;
diff --git a/parquet/src/arrow/arrow_writer/byte_array.rs b/parquet/src/arrow/arrow_writer/byte_array.rs
index 7070cecac..d52317852 100644
--- a/parquet/src/arrow/arrow_writer/byte_array.rs
+++ b/parquet/src/arrow/arrow_writer/byte_array.rs
@@ -31,17 +31,17 @@ use crate::file::writer::OnCloseColumnChunk;
use crate::schema::types::ColumnDescPtr;
use crate::util::bit_util::num_required_bits;
use crate::util::interner::{Interner, Storage};
-use arrow::array::{
+use arrow_array::{
Array, ArrayAccessor, ArrayRef, BinaryArray, DictionaryArray, LargeBinaryArray,
LargeStringArray, StringArray,
};
-use arrow::datatypes::DataType;
+use arrow_schema::DataType;
macro_rules! downcast_dict_impl {
($array:ident, $key:ident, $val:ident, $op:expr $(, $arg:expr)*) => {{
$op($array
.as_any()
- .downcast_ref::<DictionaryArray<arrow::datatypes::$key>>()
+ .downcast_ref::<DictionaryArray<arrow_array::types::$key>>()
.unwrap()
.downcast_dict::<$val>()
.unwrap()$(, $arg)*)
diff --git a/parquet/src/arrow/arrow_writer/levels.rs b/parquet/src/arrow/arrow_writer/levels.rs
index 5736f05fd..e2a8a8c50 100644
--- a/parquet/src/arrow/arrow_writer/levels.rs
+++ b/parquet/src/arrow/arrow_writer/levels.rs
@@ -41,11 +41,11 @@
//! \[1\] [parquet-format#nested-encoding](https://github.com/apache/parquet-format#nested-encoding)
use crate::errors::{ParquetError, Result};
-use arrow::array::{
- make_array, Array, ArrayData, ArrayRef, GenericListArray, MapArray, OffsetSizeTrait,
- StructArray,
+use arrow_array::{
+ make_array, Array, ArrayRef, GenericListArray, MapArray, OffsetSizeTrait, StructArray,
};
-use arrow::datatypes::{DataType, Field};
+use arrow_data::ArrayData;
+use arrow_schema::{DataType, Field};
use std::ops::Range;
/// Performs a depth-first scan of the children of `array`, constructing [`LevelInfo`]
@@ -482,11 +482,13 @@ mod tests {
use std::sync::Arc;
- use arrow::array::*;
- use arrow::buffer::Buffer;
- use arrow::datatypes::{Int32Type, Schema, ToByteSlice};
- use arrow::record_batch::RecordBatch;
- use arrow::util::pretty::pretty_format_columns;
+ use arrow_array::builder::*;
+ use arrow_array::types::Int32Type;
+ use arrow_array::*;
+ use arrow_buffer::{Buffer, ToByteSlice};
+ use arrow_cast::display::array_value_to_string;
+ use arrow_data::ArrayDataBuilder;
+ use arrow_schema::Schema;
#[test]
fn test_calculate_array_levels_twitter_example() {
@@ -1355,21 +1357,18 @@ mod tests {
let list_field = Field::new("col", list_type, true);
let expected = vec![
- r#"+-------------------------------------+"#,
- r#"| col |"#,
- r#"+-------------------------------------+"#,
- r#"| |"#,
- r#"| |"#,
- r#"| [] |"#,
- r#"| [{"list": [3, ], "integers": null}] |"#,
- r#"| [, {"list": null, "integers": 5}] |"#,
- r#"| [] |"#,
- r#"+-------------------------------------+"#,
- ]
- .join("\n");
-
- let pretty = pretty_format_columns(list_field.name(), &[list.clone()]).unwrap();
- assert_eq!(pretty.to_string(), expected);
+ r#""#.to_string(),
+ r#""#.to_string(),
+ r#"[]"#.to_string(),
+ r#"[{"list": [3, ], "integers": null}]"#.to_string(),
+ r#"[, {"list": null, "integers": 5}]"#.to_string(),
+ r#"[]"#.to_string(),
+ ];
+
+ let actual: Vec<_> = (0..6)
+ .map(|x| array_value_to_string(&list, x).unwrap())
+ .collect();
+ assert_eq!(actual, expected);
let levels = calculate_array_levels(&list, &list_field).unwrap();
diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs
index 52f55a91b..ecb59e93e 100644
--- a/parquet/src/arrow/arrow_writer/mod.rs
+++ b/parquet/src/arrow/arrow_writer/mod.rs
@@ -21,11 +21,8 @@ use std::collections::VecDeque;
use std::io::Write;
use std::sync::Arc;
-use arrow::array as arrow_array;
-use arrow::array::ArrayRef;
-use arrow::datatypes::{DataType as ArrowDataType, IntervalUnit, SchemaRef};
-use arrow::record_batch::RecordBatch;
-use arrow_array::Array;
+use arrow_array::{Array, ArrayRef, RecordBatch};
+use arrow_schema::{DataType as ArrowDataType, IntervalUnit, SchemaRef};
use super::schema::{
add_encoded_arrow_schema_to_metadata, arrow_to_parquet_schema,
@@ -54,8 +51,8 @@ mod levels;
/// ```
/// # use std::sync::Arc;
/// # use bytes::Bytes;
-/// # use arrow::array::{ArrayRef, Int64Array};
-/// # use arrow::record_batch::RecordBatch;
+/// # use arrow_array::{ArrayRef, Int64Array};
+/// # use arrow_array::RecordBatch;
/// # use parquet::arrow::{ArrowReader, ArrowWriter, ParquetFileArrowReader};
/// let col = Arc::new(Int64Array::from_iter_values([1, 2, 3])) as ArrayRef;
/// let to_write = RecordBatch::try_from_iter([("col", col)]).unwrap();
@@ -376,8 +373,8 @@ fn write_leaf(
match column.data_type() {
ArrowDataType::Date64 => {
// If the column is a Date64, we cast it to a Date32, and then interpret that as Int32
- let array = arrow::compute::cast(column, &ArrowDataType::Date32)?;
- let array = arrow::compute::cast(&array, &ArrowDataType::Int32)?;
+ let array = arrow_cast::cast(column, &ArrowDataType::Date32)?;
+ let array = arrow_cast::cast(&array, &ArrowDataType::Int32)?;
let array = array
.as_any()
@@ -394,7 +391,7 @@ fn write_leaf(
write_primitive(typed, &array[offset..offset + data.len()], levels)?
}
_ => {
- let array = arrow::compute::cast(column, &ArrowDataType::Int32)?;
+ let array = arrow_cast::cast(column, &ArrowDataType::Int32)?;
let array = array
.as_any()
.downcast_ref::<arrow_array::Int32Array>()
@@ -432,7 +429,7 @@ fn write_leaf(
write_primitive(typed, &array[offset..offset + data.len()], levels)?
}
_ => {
- let array = arrow::compute::cast(column, &ArrowDataType::Int64)?;
+ let array = arrow_cast::cast(column, &ArrowDataType::Int64)?;
let array = array
.as_any()
.downcast_ref::<arrow_array::Int64Array>()
@@ -618,9 +615,9 @@ mod tests {
use arrow::datatypes::ToByteSlice;
use arrow::datatypes::{DataType, Field, Schema, UInt32Type, UInt8Type};
use arrow::error::Result as ArrowResult;
- use arrow::record_batch::RecordBatch;
use arrow::util::pretty::pretty_format_batches;
use arrow::{array::*, buffer::Buffer};
+ use arrow_array::RecordBatch;
use crate::basic::Encoding;
use crate::file::metadata::ParquetMetaData;
diff --git a/parquet/src/arrow/async_reader.rs b/parquet/src/arrow/async_reader.rs
index b6b5d7ff7..d52fa0406 100644
--- a/parquet/src/arrow/async_reader.rs
+++ b/parquet/src/arrow/async_reader.rs
@@ -22,7 +22,7 @@
//! # #[tokio::main(flavor="current_thread")]
//! # async fn main() {
//! #
-//! use arrow::record_batch::RecordBatch;
+//! use arrow_array::RecordBatch;
//! use arrow::util::pretty::pretty_format_batches;
//! use futures::TryStreamExt;
//! use tokio::fs::File;
@@ -93,8 +93,8 @@ use thrift::protocol::TCompactInputProtocol;
use tokio::io::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt};
-use arrow::datatypes::SchemaRef;
-use arrow::record_batch::RecordBatch;
+use arrow_array::RecordBatch;
+use arrow_schema::SchemaRef;
use crate::arrow::array_reader::{build_array_reader, RowGroupCollection};
use crate::arrow::arrow_reader::{
@@ -797,8 +797,8 @@ mod tests {
use crate::arrow::ArrowWriter;
use crate::file::footer::parse_metadata;
use crate::file::page_index::index_reader;
- use arrow::array::{Array, ArrayRef, Int32Array, StringArray};
use arrow::error::Result as ArrowResult;
+ use arrow_array::{Array, ArrayRef, Int32Array, StringArray};
use futures::TryStreamExt;
use rand::{thread_rng, Rng};
use std::sync::Mutex;
diff --git a/parquet/src/arrow/buffer/bit_util.rs b/parquet/src/arrow/buffer/bit_util.rs
index 04704237c..34a0a4b83 100644
--- a/parquet/src/arrow/buffer/bit_util.rs
+++ b/parquet/src/arrow/buffer/bit_util.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-use arrow::util::bit_chunk_iterator::UnalignedBitChunk;
+use arrow_buffer::bit_chunk_iterator::UnalignedBitChunk;
use std::ops::Range;
/// Counts the number of set bits in the provided range
@@ -65,7 +65,7 @@ pub fn sign_extend_be<const N: usize>(b: &[u8]) -> [u8; N] {
#[cfg(test)]
mod tests {
use super::*;
- use arrow::array::BooleanBufferBuilder;
+ use arrow_array::builder::BooleanBufferBuilder;
use rand::prelude::*;
#[test]
diff --git a/parquet/src/arrow/buffer/dictionary_buffer.rs b/parquet/src/arrow/buffer/dictionary_buffer.rs
index ae9e3590d..23ebea57b 100644
--- a/parquet/src/arrow/buffer/dictionary_buffer.rs
+++ b/parquet/src/arrow/buffer/dictionary_buffer.rs
@@ -21,9 +21,10 @@ use crate::arrow::record_reader::buffer::{
};
use crate::column::reader::decoder::ValuesBufferSlice;
use crate::errors::{ParquetError, Result};
-use arrow::array::{make_array, Array, ArrayDataBuilder, ArrayRef, OffsetSizeTrait};
-use arrow::buffer::Buffer;
-use arrow::datatypes::{ArrowNativeType, DataType as ArrowType};
+use arrow_array::{make_array, Array, ArrayRef, OffsetSizeTrait};
+use arrow_buffer::{ArrowNativeType, Buffer};
+use arrow_data::ArrayDataBuilder;
+use arrow_schema::DataType as ArrowType;
use std::sync::Arc;
/// An array of variable length byte arrays that are potentially dictionary encoded
@@ -179,7 +180,7 @@ impl<K: ScalarValue + ArrowNativeType + Ord, V: ScalarValue + OffsetSizeTrait>
};
// This will compute a new dictionary
- let array = arrow::compute::cast(
+ let array = arrow_cast::cast(
&values.into_array(null_buffer, value_type),
data_type,
)
@@ -252,8 +253,8 @@ impl<K: ScalarValue, V: ScalarValue + OffsetSizeTrait> BufferQueue
#[cfg(test)]
mod tests {
use super::*;
- use arrow::array::{Array, StringArray};
use arrow::compute::cast;
+ use arrow_array::{Array, StringArray};
#[test]
fn test_dictionary_buffer() {
diff --git a/parquet/src/arrow/buffer/offset_buffer.rs b/parquet/src/arrow/buffer/offset_buffer.rs
index 48eb70137..df96996e3 100644
--- a/parquet/src/arrow/buffer/offset_buffer.rs
+++ b/parquet/src/arrow/buffer/offset_buffer.rs
@@ -21,9 +21,10 @@ use crate::arrow::record_reader::buffer::{
};
use crate::column::reader::decoder::ValuesBufferSlice;
use crate::errors::{ParquetError, Result};
-use arrow::array::{make_array, ArrayDataBuilder, ArrayRef, OffsetSizeTrait};
-use arrow::buffer::Buffer;
-use arrow::datatypes::{ArrowNativeType, DataType as ArrowType};
+use arrow_array::{make_array, ArrayRef, OffsetSizeTrait};
+use arrow_buffer::{ArrowNativeType, Buffer};
+use arrow_data::ArrayDataBuilder;
+use arrow_schema::DataType as ArrowType;
/// A buffer of variable-sized byte arrays that can be converted into
/// a corresponding [`ArrayRef`]
@@ -238,7 +239,7 @@ impl<I: ScalarValue> ValuesBufferSlice for OffsetBuffer<I> {
#[cfg(test)]
mod tests {
use super::*;
- use arrow::array::{Array, LargeStringArray, StringArray};
+ use arrow_array::{Array, LargeStringArray, StringArray};
#[test]
fn test_offset_buffer_empty() {
diff --git a/parquet/src/arrow/mod.rs b/parquet/src/arrow/mod.rs
index c5fe0fa2a..97d0c25e2 100644
--- a/parquet/src/arrow/mod.rs
+++ b/parquet/src/arrow/mod.rs
@@ -16,8 +16,8 @@
// under the License.
//! Provides API for reading/writing Arrow
-//! [RecordBatch](arrow::record_batch::RecordBatch)es and
-//! [Array](arrow::array::Array)s to/from Parquet Files.
+//! [RecordBatch](arrow_array::RecordBatch)es and
+//! [Array](arrow_array::Array)s to/from Parquet Files.
//!
//! [Apache Arrow](http://arrow.apache.org/) is a cross-language development platform for
//! in-memory data.
@@ -25,8 +25,8 @@
//!# Example of writing Arrow record batch to Parquet file
//!
//!```rust
-//! use arrow::array::{Int32Array, ArrayRef};
-//! use arrow::record_batch::RecordBatch;
+//! use arrow_array::{Int32Array, ArrayRef};
+//! use arrow_array::RecordBatch;
//! use parquet::arrow::arrow_writer::ArrowWriter;
//! use parquet::file::properties::WriterProperties;
//! use std::fs::File;
@@ -70,9 +70,9 @@
//! use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
//!
//! # use std::sync::Arc;
-//! # use arrow::array::Int32Array;
+//! # use arrow_array::Int32Array;
//! # use arrow::datatypes::{DataType, Field, Schema};
-//! # use arrow::record_batch::RecordBatch;
+//! # use arrow_array::RecordBatch;
//! # use parquet::arrow::arrow_writer::ArrowWriter;
//! #
//! # let ids = Int32Array::from(vec![1, 2, 3, 4]);
diff --git a/parquet/src/arrow/record_reader/buffer.rs b/parquet/src/arrow/record_reader/buffer.rs
index 64ea38f80..404989493 100644
--- a/parquet/src/arrow/record_reader/buffer.rs
+++ b/parquet/src/arrow/record_reader/buffer.rs
@@ -19,8 +19,7 @@ use std::marker::PhantomData;
use crate::arrow::buffer::bit_util::iter_set_bits_rev;
use crate::data_type::Int96;
-use arrow::buffer::{Buffer, MutableBuffer};
-use arrow::datatypes::ArrowNativeType;
+use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer};
/// A buffer that supports writing new data to the end, and removing data from the front
///
diff --git a/parquet/src/arrow/record_reader/definition_levels.rs b/parquet/src/arrow/record_reader/definition_levels.rs
index 2d65db77f..84b7ab94c 100644
--- a/parquet/src/arrow/record_reader/definition_levels.rs
+++ b/parquet/src/arrow/record_reader/definition_levels.rs
@@ -17,10 +17,10 @@
use std::ops::Range;
-use arrow::array::BooleanBufferBuilder;
-use arrow::bitmap::Bitmap;
-use arrow::buffer::Buffer;
-use arrow::util::bit_chunk_iterator::UnalignedBitChunk;
+use arrow_array::builder::BooleanBufferBuilder;
+use arrow_buffer::bit_chunk_iterator::UnalignedBitChunk;
+use arrow_buffer::Buffer;
+use arrow_data::Bitmap;
use crate::arrow::buffer::bit_util::count_set_bits;
use crate::arrow::record_reader::buffer::BufferQueue;
diff --git a/parquet/src/arrow/record_reader/mod.rs b/parquet/src/arrow/record_reader/mod.rs
index b7318af9e..ef17b8d0e 100644
--- a/parquet/src/arrow/record_reader/mod.rs
+++ b/parquet/src/arrow/record_reader/mod.rs
@@ -17,8 +17,8 @@
use std::cmp::{max, min};
-use arrow::bitmap::Bitmap;
-use arrow::buffer::Buffer;
+use arrow_buffer::Buffer;
+use arrow_data::Bitmap;
use crate::arrow::record_reader::{
buffer::{BufferQueue, ScalarBuffer, ValuesBuffer},
@@ -409,9 +409,9 @@ fn packed_null_mask(descr: &ColumnDescPtr) -> bool {
mod tests {
use std::sync::Arc;
- use arrow::array::{Int16BufferBuilder, Int32BufferBuilder};
use arrow::bitmap::Bitmap;
use arrow::buffer::Buffer;
+ use arrow_array::builder::{Int16BufferBuilder, Int32BufferBuilder};
use crate::basic::Encoding;
use crate::data_type::Int32Type;
diff --git a/parquet/src/arrow/schema.rs b/parquet/src/arrow/schema.rs
index 7803385e7..395c4aac1 100644
--- a/parquet/src/arrow/schema.rs
+++ b/parquet/src/arrow/schema.rs
@@ -26,8 +26,8 @@
use std::collections::HashMap;
use std::sync::Arc;
-use arrow::datatypes::{DataType, Field, Schema, TimeUnit};
-use arrow::ipc::writer;
+use arrow_schema::{DataType, Field, Schema, TimeUnit};
+use arrow_ipc::writer;
use crate::basic::{
ConvertedType, LogicalType, Repetition, TimeUnit as ParquetTimeUnit,
@@ -108,10 +108,10 @@ fn get_arrow_schema_from_metadata(encoded_meta: &str) -> Result<Schema> {
} else {
bytes.as_slice()
};
- match arrow::ipc::root_as_message(slice) {
+ match arrow_ipc::root_as_message(slice) {
Ok(message) => message
.header_as_schema()
- .map(arrow::ipc::convert::fb_to_schema)
+ .map(arrow_ipc::convert::fb_to_schema)
.ok_or_else(|| arrow_err!("the message is not Arrow Schema")),
Err(err) => {
// The flatbuffers implementation returns an error on verification error.
@@ -137,7 +137,7 @@ fn get_arrow_schema_from_metadata(encoded_meta: &str) -> Result<Schema> {
/// Encodes the Arrow schema into the IPC format, and base64 encodes it
fn encode_arrow_schema(schema: &Schema) -> String {
let options = writer::IpcWriteOptions::default();
- let data_gen = arrow::ipc::writer::IpcDataGenerator::default();
+ let data_gen = writer::IpcDataGenerator::default();
let mut serialized_schema = data_gen.schema_to_bytes(schema, &options);
// manually prepending the length to the schema as arrow uses the legacy IPC format
diff --git a/parquet/src/arrow/schema/complex.rs b/parquet/src/arrow/schema/complex.rs
index d63ab5606..2334a5601 100644
--- a/parquet/src/arrow/schema/complex.rs
+++ b/parquet/src/arrow/schema/complex.rs
@@ -21,7 +21,7 @@ use crate::basic::{ConvertedType, Repetition};
use crate::errors::ParquetError;
use crate::errors::Result;
use crate::schema::types::{SchemaDescriptor, Type, TypePtr};
-use arrow::datatypes::{DataType, Field, Schema};
+use arrow_schema::{DataType, Field, Schema};
fn get_repetition(t: &Type) -> Repetition {
let info = t.get_basic_info();
diff --git a/parquet/src/arrow/schema/primitive.rs b/parquet/src/arrow/schema/primitive.rs
index 87edd75b0..e5bab9ac9 100644
--- a/parquet/src/arrow/schema/primitive.rs
+++ b/parquet/src/arrow/schema/primitive.rs
@@ -20,7 +20,7 @@ use crate::basic::{
};
use crate::errors::{ParquetError, Result};
use crate::schema::types::{BasicTypeInfo, Type};
-use arrow::datatypes::{DataType, IntervalUnit, TimeUnit};
+use arrow_schema::{DataType, IntervalUnit, TimeUnit};
/// Converts [`Type`] to [`DataType`] with an optional `arrow_type_hint`
/// provided by the arrow schema
diff --git a/parquet/src/bin/parquet-fromcsv.rs b/parquet/src/bin/parquet-fromcsv.rs
index 8c62241e3..5fdece7cc 100644
--- a/parquet/src/bin/parquet-fromcsv.rs
+++ b/parquet/src/bin/parquet-fromcsv.rs
@@ -71,7 +71,8 @@ use std::{
sync::Arc,
};
-use arrow::{csv::ReaderBuilder, datatypes::Schema, error::ArrowError};
+use arrow_csv::ReaderBuilder;
+use arrow_schema::{ArrowError, Schema};
use clap::{Parser, ValueEnum};
use parquet::{
arrow::{parquet_to_arrow_schema, ArrowWriter},
diff --git a/parquet/src/column/reader.rs b/parquet/src/column/reader.rs
index 09254999b..f63b1e60a 100644
--- a/parquet/src/column/reader.rs
+++ b/parquet/src/column/reader.rs
@@ -515,6 +515,7 @@ where
/// If the current page is fully decoded, this will NOT load the next page
/// into the buffer
#[inline]
+ #[cfg(feature = "arrow")]
pub(crate) fn peek_next(&mut self) -> Result<bool> {
if self.num_buffered_values == 0
|| self.num_buffered_values == self.num_decoded_values
diff --git a/parquet/src/column/writer/encoder.rs b/parquet/src/column/writer/encoder.rs
index 9227c4ba1..22cc71f6c 100644
--- a/parquet/src/column/writer/encoder.rs
+++ b/parquet/src/column/writer/encoder.rs
@@ -34,10 +34,10 @@ pub trait ColumnValues {
fn len(&self) -> usize;
}
-#[cfg(any(feature = "arrow", test))]
-impl<T: arrow::array::Array> ColumnValues for T {
+#[cfg(feature = "arrow")]
+impl<T: arrow_array::Array> ColumnValues for T {
fn len(&self) -> usize {
- arrow::array::Array::len(self)
+ arrow_array::Array::len(self)
}
}
diff --git a/parquet/src/compression.rs b/parquet/src/compression.rs
index bba14f94e..4ee321609 100644
--- a/parquet/src/compression.rs
+++ b/parquet/src/compression.rs
@@ -126,7 +126,7 @@ impl CodecOptionsBuilder {
/// This returns `None` if the codec type is `UNCOMPRESSED`.
pub fn create_codec(
codec: CodecType,
- options: &CodecOptions,
+ _options: &CodecOptions,
) -> Result<Option<Box<dyn Codec>>> {
match codec {
#[cfg(any(feature = "brotli", test))]
@@ -137,7 +137,7 @@ pub fn create_codec(
CodecType::SNAPPY => Ok(Some(Box::new(SnappyCodec::new()))),
#[cfg(any(feature = "lz4", test))]
CodecType::LZ4 => Ok(Some(Box::new(LZ4HadoopCodec::new(
- options.backward_compatible_lz4,
+ _options.backward_compatible_lz4,
)))),
#[cfg(any(feature = "zstd", test))]
CodecType::ZSTD => Ok(Some(Box::new(ZSTDCodec::new()))),
diff --git a/parquet/src/errors.rs b/parquet/src/errors.rs
index c4f5faaaa..cbbd24053 100644
--- a/parquet/src/errors.rs
+++ b/parquet/src/errors.rs
@@ -19,8 +19,8 @@
use std::{cell, io, result, str};
-#[cfg(any(feature = "arrow", test))]
-use arrow::error::ArrowError;
+#[cfg(feature = "arrow")]
+use arrow_schema::ArrowError;
#[derive(Debug, PartialEq, Clone, Eq)]
pub enum ParquetError {
@@ -34,7 +34,7 @@ pub enum ParquetError {
/// Returned when IO related failures occur, e.g. when there are not enough bytes to
/// decode.
EOF(String),
- #[cfg(any(feature = "arrow", test))]
+ #[cfg(feature = "arrow")]
/// Arrow error.
/// Returned when reading into arrow or writing from arrow.
ArrowError(String),
@@ -49,7 +49,7 @@ impl std::fmt::Display for ParquetError {
}
ParquetError::NYI(ref message) => write!(fmt, "NYI: {}", message),
ParquetError::EOF(ref message) => write!(fmt, "EOF: {}", message),
- #[cfg(any(feature = "arrow", test))]
+ #[cfg(feature = "arrow")]
ParquetError::ArrowError(ref message) => write!(fmt, "Arrow: {}", message),
ParquetError::IndexOutOfBound(ref index, ref bound) => {
write!(fmt, "Index {} out of bound: {}", index, bound)
@@ -95,7 +95,7 @@ impl From<str::Utf8Error> for ParquetError {
}
}
-#[cfg(any(feature = "arrow", test))]
+#[cfg(feature = "arrow")]
impl From<ArrowError> for ParquetError {
fn from(e: ArrowError) -> ParquetError {
ParquetError::ArrowError(format!("underlying Arrow error: {}", e))
@@ -103,7 +103,7 @@ impl From<ArrowError> for ParquetError {
}
/// A specialized `Result` for Parquet errors.
-pub type Result<T> = result::Result<T, ParquetError>;
+pub type Result<T, E = ParquetError> = result::Result<T, E>;
// ----------------------------------------------------------------------
// Conversion from `ParquetError` to other types of `Error`s
@@ -135,7 +135,7 @@ macro_rules! eof_err {
($fmt:expr, $($args:expr),*) => (ParquetError::EOF(format!($fmt, $($args),*)));
}
-#[cfg(any(feature = "arrow", test))]
+#[cfg(feature = "arrow")]
macro_rules! arrow_err {
($fmt:expr) => (ParquetError::ArrowError($fmt.to_owned()));
($fmt:expr, $($args:expr),*) => (ParquetError::ArrowError(format!($fmt, $($args),*)));
@@ -147,7 +147,7 @@ macro_rules! arrow_err {
// ----------------------------------------------------------------------
// Convert parquet error into other errors
-#[cfg(any(feature = "arrow", test))]
+#[cfg(feature = "arrow")]
impl From<ParquetError> for ArrowError {
fn from(p: ParquetError) -> Self {
Self::ParquetError(format!("{}", p))
diff --git a/parquet/src/file/serialized_reader.rs b/parquet/src/file/serialized_reader.rs
index 2b3c7d139..a400d4dab 100644
--- a/parquet/src/file/serialized_reader.rs
+++ b/parquet/src/file/serialized_reader.rs
@@ -292,6 +292,7 @@ impl<R: 'static + ChunkReader> SerializedFileReader<R> {
}
}
+ #[cfg(feature = "arrow")]
pub(crate) fn metadata_ref(&self) -> &Arc<ParquetMetaData> {
&self.metadata
}
diff --git a/parquet/src/lib.rs b/parquet/src/lib.rs
index b34d9aa8a..07cddfc3f 100644
--- a/parquet/src/lib.rs
+++ b/parquet/src/lib.rs
@@ -79,7 +79,7 @@ pub use self::encodings::{decoding, encoding};
pub use self::util::memory;
experimental!(#[macro_use] mod util);
-#[cfg(any(feature = "arrow", test))]
+#[cfg(feature = "arrow")]
pub mod arrow;
pub mod column;
experimental!(mod compression);
diff --git a/parquet/src/util/interner.rs b/parquet/src/util/interner.rs
index e638237e0..a59ab8e7a 100644
--- a/parquet/src/util/interner.rs
+++ b/parquet/src/util/interner.rs
@@ -88,6 +88,7 @@ impl<S: Storage> Interner<S> {
}
/// Unwraps the inner storage
+ #[cfg(feature = "arrow")]
pub fn into_inner(self) -> S {
self.storage
}
diff --git a/parquet_derive/Cargo.toml b/parquet_derive/Cargo.toml
index cf068d2f4..a0b2b6ea1 100644
--- a/parquet_derive/Cargo.toml
+++ b/parquet_derive/Cargo.toml
@@ -23,7 +23,7 @@ description = "Derive macros for the Rust implementation of Apache Parquet"
homepage = "https://github.com/apache/arrow-rs"
repository = "https://github.com/apache/arrow-rs"
authors = ["Apache Arrow <de...@arrow.apache.org>"]
-keywords = [ "parquet" ]
+keywords = ["parquet"]
readme = "README.md"
edition = "2021"
rust-version = "1.62"
@@ -34,5 +34,5 @@ proc-macro = true
[dependencies]
proc-macro2 = { version = "1.0", default-features = false }
quote = { version = "1.0", default-features = false }
-syn = { version = "1.0", default-features = false }
-parquet = { path = "../parquet", version = "26.0.0" }
+syn = { version = "1.0", features = ["extra-traits"] }
+parquet = { path = "../parquet", version = "26.0.0", default-features = false }