You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ne...@apache.org on 2019/07/30 05:30:00 UTC
[arrow] branch master updated: ARROW-5901: [Rust] Add equals to
json arrays.
This is an automated email from the ASF dual-hosted git repository.
nevime pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new b071f6b ARROW-5901: [Rust] Add equals to json arrays.
b071f6b is described below
commit b071f6b08107831766f9ecda5b46a89011f00a23
Author: Renjie Liu <li...@gmail.com>
AuthorDate: Tue Jul 30 07:29:38 2019 +0200
ARROW-5901: [Rust] Add equals to json arrays.
Checks whether an arrow array equals to an json array. This is motivated when I'm developing integration tests of parquet arrow reader. I use protobuf to generate both parquet data and json data, read parquet data to arrow, compare it with json data to verify the correct ness.
Closes #4940 from liurenjie1024/arrow-5901 and squashes the following commits:
c4815efc1 <Renjie Liu> Add tests to improve coverage
cf3490727 <Renjie Liu> Fix comments
d6b7c9cc5 <Renjie Liu> Fix code style problem
b41d05746 <Renjie Liu> Add equals to json arrays.
Authored-by: Renjie Liu <li...@gmail.com>
Signed-off-by: Neville Dipale <ne...@gmail.com>
---
rust/arrow/src/array/array.rs | 29 ++-
rust/arrow/src/array/equal.rs | 497 ++++++++++++++++++++++++++++++++++++++++++
rust/arrow/src/datatypes.rs | 104 ++++++++-
3 files changed, 617 insertions(+), 13 deletions(-)
diff --git a/rust/arrow/src/array/array.rs b/rust/arrow/src/array/array.rs
index e4e55d0..f58d03c 100644
--- a/rust/arrow/src/array/array.rs
+++ b/rust/arrow/src/array/array.rs
@@ -25,7 +25,9 @@ use std::sync::Arc;
use chrono::prelude::*;
use super::*;
+use crate::array::equal::JsonEqual;
use crate::buffer::{Buffer, MutableBuffer};
+use crate::datatypes::DataType::Struct;
use crate::datatypes::*;
use crate::error::{ArrowError, Result};
use crate::memory;
@@ -42,7 +44,7 @@ const NANOSECONDS: i64 = 1_000_000_000;
/// Trait for dealing with different types of array at runtime when the type of the
/// array is not known in advance
-pub trait Array: Send + Sync + ArrayEqual {
+pub trait Array: Send + Sync + ArrayEqual + JsonEqual {
/// Returns the array as `Any` so that it can be downcast to a specific implementation
fn as_any(&self) -> &Any;
@@ -726,6 +728,12 @@ impl ListArray {
self.values.data().data_type().clone()
}
+ /// Returns ith value of this list array.
+ pub fn value(&self, i: usize) -> ArrayRef {
+ self.values
+ .slice(self.value_offset(i) as usize, self.value_length(i) as usize)
+ }
+
/// Returns the offset for value at index `i`.
///
/// Note this doesn't do any bound checking, for performance reason.
@@ -999,6 +1007,25 @@ impl StructArray {
pub fn columns(&self) -> Vec<&ArrayRef> {
self.boxed_fields.iter().collect()
}
+
+ /// Return field names in this struct array
+ pub fn column_names(&self) -> Vec<&str> {
+ match self.data.data_type() {
+ Struct(fields) => fields
+ .iter()
+ .map(|f| f.name().as_str())
+ .collect::<Vec<&str>>(),
+ _ => unreachable!("Struct array's data type is not struct!"),
+ }
+ }
+
+ /// Return child array whose field name equals to column_name
+ pub fn column_by_name(&self, column_name: &str) -> Option<&ArrayRef> {
+ self.column_names()
+ .iter()
+ .position(|c| c == &column_name)
+ .map(|pos| self.column(pos))
+ }
}
impl From<ArrayDataRef> for StructArray {
diff --git a/rust/arrow/src/array/equal.rs b/rust/arrow/src/array/equal.rs
index 5f888ab..0f71d54 100644
--- a/rust/arrow/src/array/equal.rs
+++ b/rust/arrow/src/array/equal.rs
@@ -18,6 +18,8 @@
use super::*;
use crate::datatypes::*;
use crate::util::bit_util;
+use serde_json::value::Value::{Null as JNull, Object, String as JString};
+use serde_json::Value;
/// Trait for `Array` equality.
pub trait ArrayEqual {
@@ -418,6 +420,171 @@ fn value_offset_equal<T: Array + ListArrayOps>(this: &T, other: &T) -> bool {
true
}
+/// Trait for comparing arrow array with json array
+pub trait JsonEqual {
+ /// Checks whether arrow array equals to json array.
+ fn equals_json(&self, json: &[&Value]) -> bool;
+
+ /// Checks whether arrow array equals to json array.
+ fn equals_json_values(&self, json: &[Value]) -> bool {
+ let refs = json.iter().collect::<Vec<&Value>>();
+
+ self.equals_json(&refs)
+ }
+}
+
+/// Implement array equals for numeric type
+impl<T: ArrowPrimitiveType> JsonEqual for PrimitiveArray<T> {
+ fn equals_json(&self, json: &[&Value]) -> bool {
+ if self.len() != json.len() {
+ return false;
+ }
+
+ let result = (0..self.len()).all(|i| match json[i] {
+ Value::Null => self.is_null(i),
+ v => self.is_valid(i) && Some(v) == self.value(i).into_json_value().as_ref(),
+ });
+
+ result
+ }
+}
+
+impl<T: ArrowPrimitiveType> PartialEq<Value> for PrimitiveArray<T> {
+ fn eq(&self, json: &Value) -> bool {
+ match json {
+ Value::Array(array) => self.equals_json_values(&array),
+ _ => false,
+ }
+ }
+}
+
+impl<T: ArrowPrimitiveType> PartialEq<PrimitiveArray<T>> for Value {
+ fn eq(&self, arrow: &PrimitiveArray<T>) -> bool {
+ match self {
+ Value::Array(array) => arrow.equals_json_values(&array),
+ _ => false,
+ }
+ }
+}
+
+impl JsonEqual for ListArray {
+ fn equals_json(&self, json: &[&Value]) -> bool {
+ if self.len() != json.len() {
+ return false;
+ }
+
+ let result = (0..self.len()).all(|i| match json[i] {
+ Value::Array(v) => self.is_valid(i) && self.value(i).equals_json_values(v),
+ Value::Null => self.is_null(i) || self.value_length(i) == 0,
+ _ => false,
+ });
+
+ result
+ }
+}
+
+impl PartialEq<Value> for ListArray {
+ fn eq(&self, json: &Value) -> bool {
+ match json {
+ Value::Array(json_array) => self.equals_json_values(json_array),
+ _ => false,
+ }
+ }
+}
+
+impl PartialEq<ListArray> for Value {
+ fn eq(&self, arrow: &ListArray) -> bool {
+ match self {
+ Value::Array(json_array) => arrow.equals_json_values(json_array),
+ _ => false,
+ }
+ }
+}
+
+impl JsonEqual for StructArray {
+ fn equals_json(&self, json: &[&Value]) -> bool {
+ if self.len() != json.len() {
+ return false;
+ }
+
+ let all_object = json.iter().all(|v| match v {
+ Object(_) | JNull => true,
+ _ => false,
+ });
+
+ if !all_object {
+ return false;
+ }
+
+ for column_name in self.column_names() {
+ let json_values = json
+ .iter()
+ .map(|obj| obj.get(column_name).unwrap_or(&Value::Null))
+ .collect::<Vec<&Value>>();
+
+ if !self
+ .column_by_name(column_name)
+ .map(|arr| arr.equals_json(&json_values))
+ .unwrap_or(false)
+ {
+ return false;
+ }
+ }
+
+ return true;
+ }
+}
+
+impl PartialEq<Value> for StructArray {
+ fn eq(&self, json: &Value) -> bool {
+ match json {
+ Value::Array(json_array) => self.equals_json_values(&json_array),
+ _ => false,
+ }
+ }
+}
+
+impl PartialEq<StructArray> for Value {
+ fn eq(&self, arrow: &StructArray) -> bool {
+ match self {
+ Value::Array(json_array) => arrow.equals_json_values(&json_array),
+ _ => false,
+ }
+ }
+}
+
+impl JsonEqual for BinaryArray {
+ fn equals_json(&self, json: &[&Value]) -> bool {
+ if self.len() != json.len() {
+ return false;
+ }
+
+ (0..self.len()).all(|i| match json[i] {
+ JString(s) => self.is_valid(i) && s.as_str().as_bytes() == self.value(i),
+ JNull => self.is_null(i),
+ _ => false,
+ })
+ }
+}
+
+impl PartialEq<Value> for BinaryArray {
+ fn eq(&self, json: &Value) -> bool {
+ match json {
+ Value::Array(json_array) => self.equals_json_values(&json_array),
+ _ => false,
+ }
+ }
+}
+
+impl PartialEq<BinaryArray> for Value {
+ fn eq(&self, arrow: &BinaryArray) -> bool {
+ match self {
+ Value::Array(json_array) => arrow.equals_json_values(&json_array),
+ _ => false,
+ }
+ }
+}
+
#[cfg(test)]
mod tests {
use super::*;
@@ -703,6 +870,336 @@ mod tests {
Ok(builder.finish())
}
+ #[test]
+ fn test_primitive_json_equal() {
+ // Test equaled array
+ let arrow_array = Int32Array::from(vec![Some(1), None, Some(2), Some(3)]);
+ let json_array: Value = serde_json::from_str(
+ r#"
+ [
+ 1, null, 2, 3
+ ]
+ "#,
+ )
+ .unwrap();
+ assert!(arrow_array.eq(&json_array));
+ assert!(json_array.eq(&arrow_array));
+
+ // Test unequaled array
+ let arrow_array = Int32Array::from(vec![Some(1), None, Some(2), Some(3)]);
+ let json_array: Value = serde_json::from_str(
+ r#"
+ [
+ 1, 1, 2, 3
+ ]
+ "#,
+ )
+ .unwrap();
+ assert!(arrow_array.ne(&json_array));
+ assert!(json_array.ne(&arrow_array));
+
+ // Test unequal length case
+ let arrow_array = Int32Array::from(vec![Some(1), None, Some(2), Some(3)]);
+ let json_array: Value = serde_json::from_str(
+ r#"
+ [
+ 1, 1
+ ]
+ "#,
+ )
+ .unwrap();
+ assert!(arrow_array.ne(&json_array));
+ assert!(json_array.ne(&arrow_array));
+
+ // Test not json array type case
+ let arrow_array = Int32Array::from(vec![Some(1), None, Some(2), Some(3)]);
+ let json_array: Value = serde_json::from_str(
+ r#"
+ {
+ "a": 1
+ }
+ "#,
+ )
+ .unwrap();
+ assert!(arrow_array.ne(&json_array));
+ assert!(json_array.ne(&arrow_array));
+ }
+
+ #[test]
+ fn test_list_json_equal() {
+ // Test equal case
+ let arrow_array = create_list_array(
+ &mut ListBuilder::new(Int32Builder::new(10)),
+ &[Some(&[1, 2, 3]), None, Some(&[4, 5, 6])],
+ )
+ .unwrap();
+ let json_array: Value = serde_json::from_str(
+ r#"
+ [
+ [1, 2, 3],
+ null,
+ [4, 5, 6]
+ ]
+ "#,
+ )
+ .unwrap();
+ assert!(arrow_array.eq(&json_array));
+ assert!(json_array.eq(&arrow_array));
+
+ // Test unequal case
+ let arrow_array = create_list_array(
+ &mut ListBuilder::new(Int32Builder::new(10)),
+ &[Some(&[1, 2, 3]), None, Some(&[4, 5, 6])],
+ )
+ .unwrap();
+ let json_array: Value = serde_json::from_str(
+ r#"
+ [
+ [1, 2, 3],
+ [7, 8],
+ [4, 5, 6]
+ ]
+ "#,
+ )
+ .unwrap();
+ assert!(arrow_array.ne(&json_array));
+ assert!(json_array.ne(&arrow_array));
+
+ // Test incorrect type case
+ let arrow_array = create_list_array(
+ &mut ListBuilder::new(Int32Builder::new(10)),
+ &[Some(&[1, 2, 3]), None, Some(&[4, 5, 6])],
+ )
+ .unwrap();
+ let json_array: Value = serde_json::from_str(
+ r#"
+ {
+ "a": 1
+ }
+ "#,
+ )
+ .unwrap();
+ assert!(arrow_array.ne(&json_array));
+ assert!(json_array.ne(&arrow_array));
+ }
+
+ #[test]
+ fn test_binary_json_equal() {
+ // Test the equal case
+ let arrow_array = BinaryArray::try_from(vec![
+ Some("hello"),
+ None,
+ None,
+ Some("world"),
+ None,
+ None,
+ ])
+ .unwrap();
+ let json_array: Value = serde_json::from_str(
+ r#"
+ [
+ "hello",
+ null,
+ null,
+ "world",
+ null,
+ null
+ ]
+ "#,
+ )
+ .unwrap();
+ assert!(arrow_array.eq(&json_array));
+ assert!(json_array.eq(&arrow_array));
+
+ // Test unequal case
+ let arrow_array = BinaryArray::try_from(vec![
+ Some("hello"),
+ None,
+ None,
+ Some("world"),
+ None,
+ None,
+ ])
+ .unwrap();
+ let json_array: Value = serde_json::from_str(
+ r#"
+ [
+ "hello",
+ null,
+ null,
+ "arrow",
+ null,
+ null
+ ]
+ "#,
+ )
+ .unwrap();
+ assert!(arrow_array.ne(&json_array));
+ assert!(json_array.ne(&arrow_array));
+
+ // Test unequal length case
+ let arrow_array =
+ BinaryArray::try_from(vec![Some("hello"), None, None, Some("world"), None])
+ .unwrap();
+ let json_array: Value = serde_json::from_str(
+ r#"
+ [
+ "hello",
+ null,
+ null,
+ "arrow",
+ null,
+ null
+ ]
+ "#,
+ )
+ .unwrap();
+ assert!(arrow_array.ne(&json_array));
+ assert!(json_array.ne(&arrow_array));
+
+ // Test incorrect type case
+ let arrow_array =
+ BinaryArray::try_from(vec![Some("hello"), None, None, Some("world"), None])
+ .unwrap();
+ let json_array: Value = serde_json::from_str(
+ r#"
+ {
+ "a": 1
+ }
+ "#,
+ )
+ .unwrap();
+ assert!(arrow_array.ne(&json_array));
+ assert!(json_array.ne(&arrow_array));
+
+ // Test incorrect value type case
+ let arrow_array =
+ BinaryArray::try_from(vec![Some("hello"), None, None, Some("world"), None])
+ .unwrap();
+ let json_array: Value = serde_json::from_str(
+ r#"
+ [
+ "hello",
+ null,
+ null,
+ 1,
+ null,
+ null
+ ]
+ "#,
+ )
+ .unwrap();
+ assert!(arrow_array.ne(&json_array));
+ assert!(json_array.ne(&arrow_array));
+ }
+
+ #[test]
+ fn test_struct_json_equal() {
+ // Test equal case
+ let string_builder = BinaryBuilder::new(5);
+ let int_builder = Int32Builder::new(5);
+
+ let mut fields = Vec::new();
+ let mut field_builders = Vec::new();
+ fields.push(Field::new("f1", DataType::Utf8, false));
+ field_builders.push(Box::new(string_builder) as Box<ArrayBuilder>);
+ fields.push(Field::new("f2", DataType::Int32, false));
+ field_builders.push(Box::new(int_builder) as Box<ArrayBuilder>);
+
+ let mut builder = StructBuilder::new(fields, field_builders);
+
+ let arrow_array = create_struct_array(
+ &mut builder,
+ &[Some("joe"), None, None, Some("mark"), Some("doe")],
+ &[Some(1), Some(2), None, Some(4), Some(5)],
+ &[true, true, false, true, true],
+ )
+ .unwrap();
+
+ let json_array: Value = serde_json::from_str(
+ r#"
+ [
+ {
+ "f1": "joe",
+ "f2": 1
+ },
+ {
+ "f2": 2
+ },
+ null,
+ {
+ "f1": "mark",
+ "f2": 4
+ },
+ {
+ "f1": "doe",
+ "f2": 5
+ }
+ ]
+ "#,
+ )
+ .unwrap();
+ assert!(arrow_array.eq(&json_array));
+ assert!(json_array.eq(&arrow_array));
+
+ // Test unequal length case
+ let json_array: Value = serde_json::from_str(
+ r#"
+ [
+ {
+ "f1": "joe",
+ "f2": 1
+ },
+ {
+ "f2": 2
+ },
+ null,
+ {
+ "f1": "mark",
+ "f2": 4
+ }
+ ]
+ "#,
+ )
+ .unwrap();
+ assert!(arrow_array.ne(&json_array));
+ assert!(json_array.ne(&arrow_array));
+
+ // Test incorrect type case
+ let json_array: Value = serde_json::from_str(
+ r#"
+ {
+ "f1": "joe",
+ "f2": 1
+ }
+ "#,
+ )
+ .unwrap();
+ assert!(arrow_array.ne(&json_array));
+ assert!(json_array.ne(&arrow_array));
+
+ // Test not all object case
+ let json_array: Value = serde_json::from_str(
+ r#"
+ [
+ {
+ "f1": "joe",
+ "f2": 1
+ },
+ 2,
+ null,
+ {
+ "f1": "mark",
+ "f2": 4
+ }
+ ]
+ "#,
+ )
+ .unwrap();
+ assert!(arrow_array.ne(&json_array));
+ assert!(json_array.ne(&arrow_array));
+ }
+
fn create_struct_array<
'a,
T: AsRef<[Option<&'a str>]>,
diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs
index e0b6d70..f167e4e 100644
--- a/rust/arrow/src/datatypes.rs
+++ b/rust/arrow/src/datatypes.rs
@@ -29,7 +29,7 @@ use std::str::FromStr;
use packed_simd::*;
use serde_derive::{Deserialize, Serialize};
-use serde_json::{json, Value};
+use serde_json::{json, Number, Value, Value::Number as VNumber};
use crate::error::{ArrowError, Result};
@@ -102,6 +102,7 @@ pub struct Field {
pub trait ArrowNativeType:
fmt::Debug + Send + Sync + Copy + PartialOrd + FromStr + 'static
{
+ fn into_json_value(self) -> Option<Value>;
}
/// Trait indicating a primitive fixed-width type (bool, ints and floats).
@@ -121,17 +122,71 @@ pub trait ArrowPrimitiveType: 'static {
fn default_value() -> Self::Native;
}
-impl ArrowNativeType for bool {}
-impl ArrowNativeType for i8 {}
-impl ArrowNativeType for i16 {}
-impl ArrowNativeType for i32 {}
-impl ArrowNativeType for i64 {}
-impl ArrowNativeType for u8 {}
-impl ArrowNativeType for u16 {}
-impl ArrowNativeType for u32 {}
-impl ArrowNativeType for u64 {}
-impl ArrowNativeType for f32 {}
-impl ArrowNativeType for f64 {}
+impl ArrowNativeType for bool {
+ fn into_json_value(self) -> Option<Value> {
+ Some(self.into())
+ }
+}
+
+impl ArrowNativeType for i8 {
+ fn into_json_value(self) -> Option<Value> {
+ Some(VNumber(Number::from(self)))
+ }
+}
+
+impl ArrowNativeType for i16 {
+ fn into_json_value(self) -> Option<Value> {
+ Some(VNumber(Number::from(self)))
+ }
+}
+
+impl ArrowNativeType for i32 {
+ fn into_json_value(self) -> Option<Value> {
+ Some(VNumber(Number::from(self)))
+ }
+}
+
+impl ArrowNativeType for i64 {
+ fn into_json_value(self) -> Option<Value> {
+ Some(VNumber(Number::from(self)))
+ }
+}
+
+impl ArrowNativeType for u8 {
+ fn into_json_value(self) -> Option<Value> {
+ Some(VNumber(Number::from(self)))
+ }
+}
+
+impl ArrowNativeType for u16 {
+ fn into_json_value(self) -> Option<Value> {
+ Some(VNumber(Number::from(self)))
+ }
+}
+
+impl ArrowNativeType for u32 {
+ fn into_json_value(self) -> Option<Value> {
+ Some(VNumber(Number::from(self)))
+ }
+}
+
+impl ArrowNativeType for u64 {
+ fn into_json_value(self) -> Option<Value> {
+ Some(VNumber(Number::from(self)))
+ }
+}
+
+impl ArrowNativeType for f32 {
+ fn into_json_value(self) -> Option<Value> {
+ Number::from_f64(self as f64).map(|num| VNumber(num))
+ }
+}
+
+impl ArrowNativeType for f64 {
+ fn into_json_value(self) -> Option<Value> {
+ Number::from_f64(self).map(|num| VNumber(num))
+ }
+}
macro_rules! make_type {
($name:ident, $native_ty:ty, $data_ty:expr, $bit_width:expr, $default_val:expr) => {
@@ -770,6 +825,9 @@ impl fmt::Display for Schema {
mod tests {
use super::*;
use serde_json;
+ use serde_json::Number;
+ use serde_json::Value::{Bool, Number as VNumber};
+ use std::f32::NAN;
#[test]
fn create_struct_type() {
@@ -1018,4 +1076,26 @@ mod tests {
assert!(schema2 != schema4);
assert!(schema3 != schema4);
}
+
+ #[test]
+ fn test_arrow_native_type_to_json() {
+ assert_eq!(Some(Bool(true)), true.into_json_value());
+ assert_eq!(Some(VNumber(Number::from(1))), 1i8.into_json_value());
+ assert_eq!(Some(VNumber(Number::from(1))), 1i16.into_json_value());
+ assert_eq!(Some(VNumber(Number::from(1))), 1i32.into_json_value());
+ assert_eq!(Some(VNumber(Number::from(1))), 1i64.into_json_value());
+ assert_eq!(Some(VNumber(Number::from(1))), 1u8.into_json_value());
+ assert_eq!(Some(VNumber(Number::from(1))), 1u16.into_json_value());
+ assert_eq!(Some(VNumber(Number::from(1))), 1u32.into_json_value());
+ assert_eq!(Some(VNumber(Number::from(1))), 1u64.into_json_value());
+ assert_eq!(
+ Some(VNumber(Number::from_f64(0.01 as f64).unwrap())),
+ 0.01.into_json_value()
+ );
+ assert_eq!(
+ Some(VNumber(Number::from_f64(0.01f64).unwrap())),
+ 0.01f64.into_json_value()
+ );
+ assert_eq!(None, NAN.into_json_value());
+ }
}