You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ji...@apache.org on 2022/02/05 15:37:44 UTC

[arrow-datafusion] 01/01: split datafusion-common module

This is an automated email from the ASF dual-hosted git repository.

jiayuliu pushed a commit to branch datafusion-common
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git

commit 74adf5fcccfbcf53042fca8ede9e224eccb5fae4
Author: Jiayu Liu <ji...@hey.com>
AuthorDate: Sat Feb 5 23:37:28 2022 +0800

    split datafusion-common module
---
 Cargo.toml                                     |   1 +
 Cargo.toml => datafusion-common/Cargo.toml     |  40 ++++--
 {datafusion => datafusion-common}/src/error.rs |   1 -
 datafusion-common/src/lib.rs                   |   3 +
 datafusion/Cargo.toml                          |   1 +
 datafusion/src/error.rs                        | 182 +------------------------
 6 files changed, 31 insertions(+), 197 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index ea1acc0..81f6bb5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,6 +18,7 @@
 [workspace]
 members = [
     "datafusion",
+    "datafusion-common",
     "datafusion-cli",
     "datafusion-examples",
     "benchmarks",
diff --git a/Cargo.toml b/datafusion-common/Cargo.toml
similarity index 50%
copy from Cargo.toml
copy to datafusion-common/Cargo.toml
index ea1acc0..18b3611 100644
--- a/Cargo.toml
+++ b/datafusion-common/Cargo.toml
@@ -15,19 +15,29 @@
 # specific language governing permissions and limitations
 # under the License.
 
-[workspace]
-members = [
-    "datafusion",
-    "datafusion-cli",
-    "datafusion-examples",
-    "benchmarks",
-    "ballista/rust/client",
-    "ballista/rust/core",
-    "ballista/rust/executor",
-    "ballista/rust/scheduler",
-    "ballista-examples",
-]
+[package]
+name = "datafusion-common"
+description = "DataFusion is an in-memory query engine that uses Apache Arrow as the memory model"
+version = "6.0.0"
+homepage = "https://github.com/apache/arrow-datafusion"
+repository = "https://github.com/apache/arrow-datafusion"
+readme = "../README.md"
+authors = ["Apache Arrow <de...@arrow.apache.org>"]
+license = "Apache-2.0"
+keywords = [ "arrow", "query", "sql" ]
+publish = false
+edition = "2021"
+rust-version = "1.58"
 
-[profile.release]
-lto = true
-codegen-units = 1
+[lib]
+name = "datafusion_common"
+path = "src/lib.rs"
+
+[features]
+avro = ["avro-rs"]
+
+[dependencies]
+arrow = { version = "8.0.0", features = ["prettyprint"] }
+parquet = { version = "8.0.0", features = ["arrow"] }
+avro-rs = { version = "0.13", features = ["snappy"], optional = true }
+sqlparser = "0.13"
diff --git a/datafusion/src/error.rs b/datafusion-common/src/error.rs
similarity index 99%
copy from datafusion/src/error.rs
copy to datafusion-common/src/error.rs
index 248f243..93978db 100644
--- a/datafusion/src/error.rs
+++ b/datafusion-common/src/error.rs
@@ -36,7 +36,6 @@ pub type GenericError = Box<dyn error::Error + Send + Sync>;
 
 /// DataFusion error
 #[derive(Debug)]
-#[allow(missing_docs)]
 pub enum DataFusionError {
     /// Error returned by arrow.
     ArrowError(ArrowError),
diff --git a/datafusion-common/src/lib.rs b/datafusion-common/src/lib.rs
new file mode 100644
index 0000000..82da89a
--- /dev/null
+++ b/datafusion-common/src/lib.rs
@@ -0,0 +1,3 @@
+mod error;
+
+pub use error::{DataFusionError, Result};
diff --git a/datafusion/Cargo.toml b/datafusion/Cargo.toml
index 54247cb..862e82d 100644
--- a/datafusion/Cargo.toml
+++ b/datafusion/Cargo.toml
@@ -50,6 +50,7 @@ force_hash_collisions = []
 avro = ["avro-rs", "num-traits"]
 
 [dependencies]
+datafusion-common = { path = "../datafusion-common" }
 ahash = { version = "0.7", default-features = false }
 hashbrown = { version = "0.12", features = ["raw"] }
 arrow = { version = "8.0.0", features = ["prettyprint"] }
diff --git a/datafusion/src/error.rs b/datafusion/src/error.rs
index 248f243..c2c80b4 100644
--- a/datafusion/src/error.rs
+++ b/datafusion/src/error.rs
@@ -16,184 +16,4 @@
 // under the License.
 
 //! DataFusion error types
-
-use std::error;
-use std::fmt::{Display, Formatter};
-use std::io;
-use std::result;
-
-use arrow::error::ArrowError;
-#[cfg(feature = "avro")]
-use avro_rs::Error as AvroError;
-use parquet::errors::ParquetError;
-use sqlparser::parser::ParserError;
-
-/// Result type for operations that could result in an [DataFusionError]
-pub type Result<T> = result::Result<T, DataFusionError>;
-
-/// Error type for generic operations that could result in DataFusionError::External
-pub type GenericError = Box<dyn error::Error + Send + Sync>;
-
-/// DataFusion error
-#[derive(Debug)]
-#[allow(missing_docs)]
-pub enum DataFusionError {
-    /// Error returned by arrow.
-    ArrowError(ArrowError),
-    /// Wraps an error from the Parquet crate
-    ParquetError(ParquetError),
-    /// Wraps an error from the Avro crate
-    #[cfg(feature = "avro")]
-    AvroError(AvroError),
-    /// Error associated to I/O operations and associated traits.
-    IoError(io::Error),
-    /// Error returned when SQL is syntactically incorrect.
-    SQL(ParserError),
-    /// Error returned on a branch that we know it is possible
-    /// but to which we still have no implementation for.
-    /// Often, these errors are tracked in our issue tracker.
-    NotImplemented(String),
-    /// Error returned as a consequence of an error in DataFusion.
-    /// This error should not happen in normal usage of DataFusion.
-    // DataFusions has internal invariants that we are unable to ask the compiler to check for us.
-    // This error is raised when one of those invariants is not verified during execution.
-    Internal(String),
-    /// This error happens whenever a plan is not valid. Examples include
-    /// impossible casts, schema inference not possible and non-unique column names.
-    Plan(String),
-    /// Error returned during execution of the query.
-    /// Examples include files not found, errors in parsing certain types.
-    Execution(String),
-    /// This error is thrown when a consumer cannot acquire memory from the Memory Manager
-    /// we can just cancel the execution of the partition.
-    ResourcesExhausted(String),
-    /// Errors originating from outside DataFusion's core codebase.
-    /// For example, a custom S3Error from the crate datafusion-objectstore-s3
-    External(GenericError),
-}
-
-impl From<io::Error> for DataFusionError {
-    fn from(e: io::Error) -> Self {
-        DataFusionError::IoError(e)
-    }
-}
-
-impl From<ArrowError> for DataFusionError {
-    fn from(e: ArrowError) -> Self {
-        DataFusionError::ArrowError(e)
-    }
-}
-
-impl From<DataFusionError> for ArrowError {
-    fn from(e: DataFusionError) -> Self {
-        match e {
-            DataFusionError::ArrowError(e) => e,
-            DataFusionError::External(e) => ArrowError::ExternalError(e),
-            other => ArrowError::ExternalError(Box::new(other)),
-        }
-    }
-}
-
-impl From<ParquetError> for DataFusionError {
-    fn from(e: ParquetError) -> Self {
-        DataFusionError::ParquetError(e)
-    }
-}
-
-#[cfg(feature = "avro")]
-impl From<AvroError> for DataFusionError {
-    fn from(e: AvroError) -> Self {
-        DataFusionError::AvroError(e)
-    }
-}
-
-impl From<ParserError> for DataFusionError {
-    fn from(e: ParserError) -> Self {
-        DataFusionError::SQL(e)
-    }
-}
-
-impl From<GenericError> for DataFusionError {
-    fn from(err: GenericError) -> Self {
-        DataFusionError::External(err)
-    }
-}
-
-impl Display for DataFusionError {
-    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
-        match *self {
-            DataFusionError::ArrowError(ref desc) => write!(f, "Arrow error: {}", desc),
-            DataFusionError::ParquetError(ref desc) => {
-                write!(f, "Parquet error: {}", desc)
-            }
-            #[cfg(feature = "avro")]
-            DataFusionError::AvroError(ref desc) => {
-                write!(f, "Avro error: {}", desc)
-            }
-            DataFusionError::IoError(ref desc) => write!(f, "IO error: {}", desc),
-            DataFusionError::SQL(ref desc) => {
-                write!(f, "SQL error: {:?}", desc)
-            }
-            DataFusionError::NotImplemented(ref desc) => {
-                write!(f, "This feature is not implemented: {}", desc)
-            }
-            DataFusionError::Internal(ref desc) => {
-                write!(f, "Internal error: {}. This was likely caused by a bug in DataFusion's \
-                    code and we would welcome that you file an bug report in our issue tracker", desc)
-            }
-            DataFusionError::Plan(ref desc) => {
-                write!(f, "Error during planning: {}", desc)
-            }
-            DataFusionError::Execution(ref desc) => {
-                write!(f, "Execution error: {}", desc)
-            }
-            DataFusionError::ResourcesExhausted(ref desc) => {
-                write!(f, "Resources exhausted: {}", desc)
-            }
-            DataFusionError::External(ref desc) => {
-                write!(f, "External error: {}", desc)
-            }
-        }
-    }
-}
-
-impl error::Error for DataFusionError {}
-
-#[cfg(test)]
-mod test {
-    use crate::error::DataFusionError;
-    use arrow::error::ArrowError;
-
-    #[test]
-    fn arrow_error_to_datafusion() {
-        let res = return_arrow_error().unwrap_err();
-        assert_eq!(
-            res.to_string(),
-            "External error: Error during planning: foo"
-        );
-    }
-
-    #[test]
-    fn datafusion_error_to_arrow() {
-        let res = return_datafusion_error().unwrap_err();
-        assert_eq!(res.to_string(), "Arrow error: Schema error: bar");
-    }
-
-    /// Model what happens when implementing SendableRecrordBatchStream:
-    /// DataFusion code needs to return an ArrowError
-    #[allow(clippy::try_err)]
-    fn return_arrow_error() -> arrow::error::Result<()> {
-        // Expect the '?' to work
-        let _foo = Err(DataFusionError::Plan("foo".to_string()))?;
-        Ok(())
-    }
-
-    /// Model what happens when using arrow kernels in DataFusion
-    /// code: need to turn an ArrowError into a DataFusionError
-    #[allow(clippy::try_err)]
-    fn return_datafusion_error() -> crate::error::Result<()> {
-        // Expect the '?' to work
-        let _bar = Err(ArrowError::SchemaError("bar".to_string()))?;
-        Ok(())
-    }
-}
+pub use datafusion_common::{DataFusionError, Result};