You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ji...@apache.org on 2022/02/05 15:37:44 UTC
[arrow-datafusion] 01/01: split datafusion-common module
This is an automated email from the ASF dual-hosted git repository.
jiayuliu pushed a commit to branch datafusion-common
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
commit 74adf5fcccfbcf53042fca8ede9e224eccb5fae4
Author: Jiayu Liu <ji...@hey.com>
AuthorDate: Sat Feb 5 23:37:28 2022 +0800
split datafusion-common module
---
Cargo.toml | 1 +
Cargo.toml => datafusion-common/Cargo.toml | 40 ++++--
{datafusion => datafusion-common}/src/error.rs | 1 -
datafusion-common/src/lib.rs | 3 +
datafusion/Cargo.toml | 1 +
datafusion/src/error.rs | 182 +------------------------
6 files changed, 31 insertions(+), 197 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index ea1acc0..81f6bb5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,6 +18,7 @@
[workspace]
members = [
"datafusion",
+ "datafusion-common",
"datafusion-cli",
"datafusion-examples",
"benchmarks",
diff --git a/Cargo.toml b/datafusion-common/Cargo.toml
similarity index 50%
copy from Cargo.toml
copy to datafusion-common/Cargo.toml
index ea1acc0..18b3611 100644
--- a/Cargo.toml
+++ b/datafusion-common/Cargo.toml
@@ -15,19 +15,29 @@
# specific language governing permissions and limitations
# under the License.
-[workspace]
-members = [
- "datafusion",
- "datafusion-cli",
- "datafusion-examples",
- "benchmarks",
- "ballista/rust/client",
- "ballista/rust/core",
- "ballista/rust/executor",
- "ballista/rust/scheduler",
- "ballista-examples",
-]
+[package]
+name = "datafusion-common"
+description = "DataFusion is an in-memory query engine that uses Apache Arrow as the memory model"
+version = "6.0.0"
+homepage = "https://github.com/apache/arrow-datafusion"
+repository = "https://github.com/apache/arrow-datafusion"
+readme = "../README.md"
+authors = ["Apache Arrow <de...@arrow.apache.org>"]
+license = "Apache-2.0"
+keywords = [ "arrow", "query", "sql" ]
+publish = false
+edition = "2021"
+rust-version = "1.58"
-[profile.release]
-lto = true
-codegen-units = 1
+[lib]
+name = "datafusion_common"
+path = "src/lib.rs"
+
+[features]
+avro = ["avro-rs"]
+
+[dependencies]
+arrow = { version = "8.0.0", features = ["prettyprint"] }
+parquet = { version = "8.0.0", features = ["arrow"] }
+avro-rs = { version = "0.13", features = ["snappy"], optional = true }
+sqlparser = "0.13"
diff --git a/datafusion/src/error.rs b/datafusion-common/src/error.rs
similarity index 99%
copy from datafusion/src/error.rs
copy to datafusion-common/src/error.rs
index 248f243..93978db 100644
--- a/datafusion/src/error.rs
+++ b/datafusion-common/src/error.rs
@@ -36,7 +36,6 @@ pub type GenericError = Box<dyn error::Error + Send + Sync>;
/// DataFusion error
#[derive(Debug)]
-#[allow(missing_docs)]
pub enum DataFusionError {
/// Error returned by arrow.
ArrowError(ArrowError),
diff --git a/datafusion-common/src/lib.rs b/datafusion-common/src/lib.rs
new file mode 100644
index 0000000..82da89a
--- /dev/null
+++ b/datafusion-common/src/lib.rs
@@ -0,0 +1,3 @@
+mod error;
+
+pub use error::{DataFusionError, Result};
diff --git a/datafusion/Cargo.toml b/datafusion/Cargo.toml
index 54247cb..862e82d 100644
--- a/datafusion/Cargo.toml
+++ b/datafusion/Cargo.toml
@@ -50,6 +50,7 @@ force_hash_collisions = []
avro = ["avro-rs", "num-traits"]
[dependencies]
+datafusion-common = { path = "../datafusion-common" }
ahash = { version = "0.7", default-features = false }
hashbrown = { version = "0.12", features = ["raw"] }
arrow = { version = "8.0.0", features = ["prettyprint"] }
diff --git a/datafusion/src/error.rs b/datafusion/src/error.rs
index 248f243..c2c80b4 100644
--- a/datafusion/src/error.rs
+++ b/datafusion/src/error.rs
@@ -16,184 +16,4 @@
// under the License.
//! DataFusion error types
-
-use std::error;
-use std::fmt::{Display, Formatter};
-use std::io;
-use std::result;
-
-use arrow::error::ArrowError;
-#[cfg(feature = "avro")]
-use avro_rs::Error as AvroError;
-use parquet::errors::ParquetError;
-use sqlparser::parser::ParserError;
-
-/// Result type for operations that could result in an [DataFusionError]
-pub type Result<T> = result::Result<T, DataFusionError>;
-
-/// Error type for generic operations that could result in DataFusionError::External
-pub type GenericError = Box<dyn error::Error + Send + Sync>;
-
-/// DataFusion error
-#[derive(Debug)]
-#[allow(missing_docs)]
-pub enum DataFusionError {
- /// Error returned by arrow.
- ArrowError(ArrowError),
- /// Wraps an error from the Parquet crate
- ParquetError(ParquetError),
- /// Wraps an error from the Avro crate
- #[cfg(feature = "avro")]
- AvroError(AvroError),
- /// Error associated to I/O operations and associated traits.
- IoError(io::Error),
- /// Error returned when SQL is syntactically incorrect.
- SQL(ParserError),
- /// Error returned on a branch that we know it is possible
- /// but to which we still have no implementation for.
- /// Often, these errors are tracked in our issue tracker.
- NotImplemented(String),
- /// Error returned as a consequence of an error in DataFusion.
- /// This error should not happen in normal usage of DataFusion.
- // DataFusions has internal invariants that we are unable to ask the compiler to check for us.
- // This error is raised when one of those invariants is not verified during execution.
- Internal(String),
- /// This error happens whenever a plan is not valid. Examples include
- /// impossible casts, schema inference not possible and non-unique column names.
- Plan(String),
- /// Error returned during execution of the query.
- /// Examples include files not found, errors in parsing certain types.
- Execution(String),
- /// This error is thrown when a consumer cannot acquire memory from the Memory Manager
- /// we can just cancel the execution of the partition.
- ResourcesExhausted(String),
- /// Errors originating from outside DataFusion's core codebase.
- /// For example, a custom S3Error from the crate datafusion-objectstore-s3
- External(GenericError),
-}
-
-impl From<io::Error> for DataFusionError {
- fn from(e: io::Error) -> Self {
- DataFusionError::IoError(e)
- }
-}
-
-impl From<ArrowError> for DataFusionError {
- fn from(e: ArrowError) -> Self {
- DataFusionError::ArrowError(e)
- }
-}
-
-impl From<DataFusionError> for ArrowError {
- fn from(e: DataFusionError) -> Self {
- match e {
- DataFusionError::ArrowError(e) => e,
- DataFusionError::External(e) => ArrowError::ExternalError(e),
- other => ArrowError::ExternalError(Box::new(other)),
- }
- }
-}
-
-impl From<ParquetError> for DataFusionError {
- fn from(e: ParquetError) -> Self {
- DataFusionError::ParquetError(e)
- }
-}
-
-#[cfg(feature = "avro")]
-impl From<AvroError> for DataFusionError {
- fn from(e: AvroError) -> Self {
- DataFusionError::AvroError(e)
- }
-}
-
-impl From<ParserError> for DataFusionError {
- fn from(e: ParserError) -> Self {
- DataFusionError::SQL(e)
- }
-}
-
-impl From<GenericError> for DataFusionError {
- fn from(err: GenericError) -> Self {
- DataFusionError::External(err)
- }
-}
-
-impl Display for DataFusionError {
- fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
- match *self {
- DataFusionError::ArrowError(ref desc) => write!(f, "Arrow error: {}", desc),
- DataFusionError::ParquetError(ref desc) => {
- write!(f, "Parquet error: {}", desc)
- }
- #[cfg(feature = "avro")]
- DataFusionError::AvroError(ref desc) => {
- write!(f, "Avro error: {}", desc)
- }
- DataFusionError::IoError(ref desc) => write!(f, "IO error: {}", desc),
- DataFusionError::SQL(ref desc) => {
- write!(f, "SQL error: {:?}", desc)
- }
- DataFusionError::NotImplemented(ref desc) => {
- write!(f, "This feature is not implemented: {}", desc)
- }
- DataFusionError::Internal(ref desc) => {
- write!(f, "Internal error: {}. This was likely caused by a bug in DataFusion's \
- code and we would welcome that you file an bug report in our issue tracker", desc)
- }
- DataFusionError::Plan(ref desc) => {
- write!(f, "Error during planning: {}", desc)
- }
- DataFusionError::Execution(ref desc) => {
- write!(f, "Execution error: {}", desc)
- }
- DataFusionError::ResourcesExhausted(ref desc) => {
- write!(f, "Resources exhausted: {}", desc)
- }
- DataFusionError::External(ref desc) => {
- write!(f, "External error: {}", desc)
- }
- }
- }
-}
-
-impl error::Error for DataFusionError {}
-
-#[cfg(test)]
-mod test {
- use crate::error::DataFusionError;
- use arrow::error::ArrowError;
-
- #[test]
- fn arrow_error_to_datafusion() {
- let res = return_arrow_error().unwrap_err();
- assert_eq!(
- res.to_string(),
- "External error: Error during planning: foo"
- );
- }
-
- #[test]
- fn datafusion_error_to_arrow() {
- let res = return_datafusion_error().unwrap_err();
- assert_eq!(res.to_string(), "Arrow error: Schema error: bar");
- }
-
- /// Model what happens when implementing SendableRecrordBatchStream:
- /// DataFusion code needs to return an ArrowError
- #[allow(clippy::try_err)]
- fn return_arrow_error() -> arrow::error::Result<()> {
- // Expect the '?' to work
- let _foo = Err(DataFusionError::Plan("foo".to_string()))?;
- Ok(())
- }
-
- /// Model what happens when using arrow kernels in DataFusion
- /// code: need to turn an ArrowError into a DataFusionError
- #[allow(clippy::try_err)]
- fn return_datafusion_error() -> crate::error::Result<()> {
- // Expect the '?' to work
- let _bar = Err(ArrowError::SchemaError("bar".to_string()))?;
- Ok(())
- }
-}
+pub use datafusion_common::{DataFusionError, Result};