You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ji...@apache.org on 2022/02/05 15:37:43 UTC

[arrow-datafusion] branch datafusion-common created (now 74adf5f)

This is an automated email from the ASF dual-hosted git repository.

jiayuliu pushed a change to branch datafusion-common
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git.


      at 74adf5f  split datafusion-common module

This branch includes the following new commits:

     new 74adf5f  split datafusion-common module

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


[arrow-datafusion] 01/01: split datafusion-common module

Posted by ji...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

jiayuliu pushed a commit to branch datafusion-common
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git

commit 74adf5fcccfbcf53042fca8ede9e224eccb5fae4
Author: Jiayu Liu <ji...@hey.com>
AuthorDate: Sat Feb 5 23:37:28 2022 +0800

    split datafusion-common module
---
 Cargo.toml                                     |   1 +
 Cargo.toml => datafusion-common/Cargo.toml     |  40 ++++--
 {datafusion => datafusion-common}/src/error.rs |   1 -
 datafusion-common/src/lib.rs                   |   3 +
 datafusion/Cargo.toml                          |   1 +
 datafusion/src/error.rs                        | 182 +------------------------
 6 files changed, 31 insertions(+), 197 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index ea1acc0..81f6bb5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,6 +18,7 @@
 [workspace]
 members = [
     "datafusion",
+    "datafusion-common",
     "datafusion-cli",
     "datafusion-examples",
     "benchmarks",
diff --git a/Cargo.toml b/datafusion-common/Cargo.toml
similarity index 50%
copy from Cargo.toml
copy to datafusion-common/Cargo.toml
index ea1acc0..18b3611 100644
--- a/Cargo.toml
+++ b/datafusion-common/Cargo.toml
@@ -15,19 +15,29 @@
 # specific language governing permissions and limitations
 # under the License.
 
-[workspace]
-members = [
-    "datafusion",
-    "datafusion-cli",
-    "datafusion-examples",
-    "benchmarks",
-    "ballista/rust/client",
-    "ballista/rust/core",
-    "ballista/rust/executor",
-    "ballista/rust/scheduler",
-    "ballista-examples",
-]
+[package]
+name = "datafusion-common"
+description = "DataFusion is an in-memory query engine that uses Apache Arrow as the memory model"
+version = "6.0.0"
+homepage = "https://github.com/apache/arrow-datafusion"
+repository = "https://github.com/apache/arrow-datafusion"
+readme = "../README.md"
+authors = ["Apache Arrow <de...@arrow.apache.org>"]
+license = "Apache-2.0"
+keywords = [ "arrow", "query", "sql" ]
+publish = false
+edition = "2021"
+rust-version = "1.58"
 
-[profile.release]
-lto = true
-codegen-units = 1
+[lib]
+name = "datafusion_common"
+path = "src/lib.rs"
+
+[features]
+avro = ["avro-rs"]
+
+[dependencies]
+arrow = { version = "8.0.0", features = ["prettyprint"] }
+parquet = { version = "8.0.0", features = ["arrow"] }
+avro-rs = { version = "0.13", features = ["snappy"], optional = true }
+sqlparser = "0.13"
diff --git a/datafusion/src/error.rs b/datafusion-common/src/error.rs
similarity index 99%
copy from datafusion/src/error.rs
copy to datafusion-common/src/error.rs
index 248f243..93978db 100644
--- a/datafusion/src/error.rs
+++ b/datafusion-common/src/error.rs
@@ -36,7 +36,6 @@ pub type GenericError = Box<dyn error::Error + Send + Sync>;
 
 /// DataFusion error
 #[derive(Debug)]
-#[allow(missing_docs)]
 pub enum DataFusionError {
     /// Error returned by arrow.
     ArrowError(ArrowError),
diff --git a/datafusion-common/src/lib.rs b/datafusion-common/src/lib.rs
new file mode 100644
index 0000000..82da89a
--- /dev/null
+++ b/datafusion-common/src/lib.rs
@@ -0,0 +1,3 @@
+mod error;
+
+pub use error::{DataFusionError, Result};
diff --git a/datafusion/Cargo.toml b/datafusion/Cargo.toml
index 54247cb..862e82d 100644
--- a/datafusion/Cargo.toml
+++ b/datafusion/Cargo.toml
@@ -50,6 +50,7 @@ force_hash_collisions = []
 avro = ["avro-rs", "num-traits"]
 
 [dependencies]
+datafusion-common = { path = "../datafusion-common" }
 ahash = { version = "0.7", default-features = false }
 hashbrown = { version = "0.12", features = ["raw"] }
 arrow = { version = "8.0.0", features = ["prettyprint"] }
diff --git a/datafusion/src/error.rs b/datafusion/src/error.rs
index 248f243..c2c80b4 100644
--- a/datafusion/src/error.rs
+++ b/datafusion/src/error.rs
@@ -16,184 +16,4 @@
 // under the License.
 
 //! DataFusion error types
-
-use std::error;
-use std::fmt::{Display, Formatter};
-use std::io;
-use std::result;
-
-use arrow::error::ArrowError;
-#[cfg(feature = "avro")]
-use avro_rs::Error as AvroError;
-use parquet::errors::ParquetError;
-use sqlparser::parser::ParserError;
-
-/// Result type for operations that could result in an [DataFusionError]
-pub type Result<T> = result::Result<T, DataFusionError>;
-
-/// Error type for generic operations that could result in DataFusionError::External
-pub type GenericError = Box<dyn error::Error + Send + Sync>;
-
-/// DataFusion error
-#[derive(Debug)]
-#[allow(missing_docs)]
-pub enum DataFusionError {
-    /// Error returned by arrow.
-    ArrowError(ArrowError),
-    /// Wraps an error from the Parquet crate
-    ParquetError(ParquetError),
-    /// Wraps an error from the Avro crate
-    #[cfg(feature = "avro")]
-    AvroError(AvroError),
-    /// Error associated to I/O operations and associated traits.
-    IoError(io::Error),
-    /// Error returned when SQL is syntactically incorrect.
-    SQL(ParserError),
-    /// Error returned on a branch that we know it is possible
-    /// but to which we still have no implementation for.
-    /// Often, these errors are tracked in our issue tracker.
-    NotImplemented(String),
-    /// Error returned as a consequence of an error in DataFusion.
-    /// This error should not happen in normal usage of DataFusion.
-    // DataFusions has internal invariants that we are unable to ask the compiler to check for us.
-    // This error is raised when one of those invariants is not verified during execution.
-    Internal(String),
-    /// This error happens whenever a plan is not valid. Examples include
-    /// impossible casts, schema inference not possible and non-unique column names.
-    Plan(String),
-    /// Error returned during execution of the query.
-    /// Examples include files not found, errors in parsing certain types.
-    Execution(String),
-    /// This error is thrown when a consumer cannot acquire memory from the Memory Manager
-    /// we can just cancel the execution of the partition.
-    ResourcesExhausted(String),
-    /// Errors originating from outside DataFusion's core codebase.
-    /// For example, a custom S3Error from the crate datafusion-objectstore-s3
-    External(GenericError),
-}
-
-impl From<io::Error> for DataFusionError {
-    fn from(e: io::Error) -> Self {
-        DataFusionError::IoError(e)
-    }
-}
-
-impl From<ArrowError> for DataFusionError {
-    fn from(e: ArrowError) -> Self {
-        DataFusionError::ArrowError(e)
-    }
-}
-
-impl From<DataFusionError> for ArrowError {
-    fn from(e: DataFusionError) -> Self {
-        match e {
-            DataFusionError::ArrowError(e) => e,
-            DataFusionError::External(e) => ArrowError::ExternalError(e),
-            other => ArrowError::ExternalError(Box::new(other)),
-        }
-    }
-}
-
-impl From<ParquetError> for DataFusionError {
-    fn from(e: ParquetError) -> Self {
-        DataFusionError::ParquetError(e)
-    }
-}
-
-#[cfg(feature = "avro")]
-impl From<AvroError> for DataFusionError {
-    fn from(e: AvroError) -> Self {
-        DataFusionError::AvroError(e)
-    }
-}
-
-impl From<ParserError> for DataFusionError {
-    fn from(e: ParserError) -> Self {
-        DataFusionError::SQL(e)
-    }
-}
-
-impl From<GenericError> for DataFusionError {
-    fn from(err: GenericError) -> Self {
-        DataFusionError::External(err)
-    }
-}
-
-impl Display for DataFusionError {
-    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
-        match *self {
-            DataFusionError::ArrowError(ref desc) => write!(f, "Arrow error: {}", desc),
-            DataFusionError::ParquetError(ref desc) => {
-                write!(f, "Parquet error: {}", desc)
-            }
-            #[cfg(feature = "avro")]
-            DataFusionError::AvroError(ref desc) => {
-                write!(f, "Avro error: {}", desc)
-            }
-            DataFusionError::IoError(ref desc) => write!(f, "IO error: {}", desc),
-            DataFusionError::SQL(ref desc) => {
-                write!(f, "SQL error: {:?}", desc)
-            }
-            DataFusionError::NotImplemented(ref desc) => {
-                write!(f, "This feature is not implemented: {}", desc)
-            }
-            DataFusionError::Internal(ref desc) => {
-                write!(f, "Internal error: {}. This was likely caused by a bug in DataFusion's \
-                    code and we would welcome that you file an bug report in our issue tracker", desc)
-            }
-            DataFusionError::Plan(ref desc) => {
-                write!(f, "Error during planning: {}", desc)
-            }
-            DataFusionError::Execution(ref desc) => {
-                write!(f, "Execution error: {}", desc)
-            }
-            DataFusionError::ResourcesExhausted(ref desc) => {
-                write!(f, "Resources exhausted: {}", desc)
-            }
-            DataFusionError::External(ref desc) => {
-                write!(f, "External error: {}", desc)
-            }
-        }
-    }
-}
-
-impl error::Error for DataFusionError {}
-
-#[cfg(test)]
-mod test {
-    use crate::error::DataFusionError;
-    use arrow::error::ArrowError;
-
-    #[test]
-    fn arrow_error_to_datafusion() {
-        let res = return_arrow_error().unwrap_err();
-        assert_eq!(
-            res.to_string(),
-            "External error: Error during planning: foo"
-        );
-    }
-
-    #[test]
-    fn datafusion_error_to_arrow() {
-        let res = return_datafusion_error().unwrap_err();
-        assert_eq!(res.to_string(), "Arrow error: Schema error: bar");
-    }
-
-    /// Model what happens when implementing SendableRecrordBatchStream:
-    /// DataFusion code needs to return an ArrowError
-    #[allow(clippy::try_err)]
-    fn return_arrow_error() -> arrow::error::Result<()> {
-        // Expect the '?' to work
-        let _foo = Err(DataFusionError::Plan("foo".to_string()))?;
-        Ok(())
-    }
-
-    /// Model what happens when using arrow kernels in DataFusion
-    /// code: need to turn an ArrowError into a DataFusionError
-    #[allow(clippy::try_err)]
-    fn return_datafusion_error() -> crate::error::Result<()> {
-        // Expect the '?' to work
-        let _bar = Err(ArrowError::SchemaError("bar".to_string()))?;
-        Ok(())
-    }
-}
+pub use datafusion_common::{DataFusionError, Result};