You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2020/05/17 17:05:34 UTC
[arrow] branch master updated: ARROW-8833: [Rust] Implement
VALIDATE mode in integration tests
This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new ce15eb2 ARROW-8833: [Rust] Implement VALIDATE mode in integration tests
ce15eb2 is described below
commit ce15eb24755bd63f1fcf2d8132d75fb2957a12ae
Author: Andy Grove <an...@gmail.com>
AuthorDate: Sun May 17 11:05:12 2020 -0600
ARROW-8833: [Rust] Implement VALIDATE mode in integration tests
This follows on from https://github.com/apache/arrow/pull/7206 which should be merged first.
This PR partially implements the VALIDATE mode in arrow-json-integration-test.
Closes #7208 from andygrove/ARROW-8833
Authored-by: Andy Grove <an...@gmail.com>
Signed-off-by: Andy Grove <an...@gmail.com>
---
rust/integration-testing/README.md | 30 ++++++
.../src/bin/arrow-json-integration-test.rs | 103 ++++++++++++++-------
2 files changed, 97 insertions(+), 36 deletions(-)
diff --git a/rust/integration-testing/README.md b/rust/integration-testing/README.md
new file mode 100644
index 0000000..66248de
--- /dev/null
+++ b/rust/integration-testing/README.md
@@ -0,0 +1,30 @@
+<!---
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+# Apache Arrow Rust Integration Testing
+
+See [Integration.rst](../../docs/source/format/Integration.rst) for an overview of integration testing.
+
+This crate contains the following binaries, which are invoked by Archery during integration testing with other Arrow implementations.
+
+| Binary | Purpose |
+|--------|---------|
+| arrow-file-to-stream | Converts an Arrow file to an Arrow stream |
+| arrow-stream-to-file | Converts an Arrow stream to an Arrow file |
+| arrow-json-integration-test | Converts between Arrow and JSON formats |
diff --git a/rust/integration-testing/src/bin/arrow-json-integration-test.rs b/rust/integration-testing/src/bin/arrow-json-integration-test.rs
index d13cc21..5694de9 100644
--- a/rust/integration-testing/src/bin/arrow-json-integration-test.rs
+++ b/rust/integration-testing/src/bin/arrow-json-integration-test.rs
@@ -26,9 +26,11 @@ use arrow::array::{
UInt16Builder, UInt32Builder, UInt64Builder, UInt8Builder,
};
use arrow::datatypes::{DataType, Schema};
+use arrow::error::{ArrowError, Result};
use arrow::ipc::reader::FileReader;
use arrow::ipc::writer::FileWriter;
use arrow::record_batch::{RecordBatch, RecordBatchReader};
+
use hex::decode;
use std::env;
use std::fs::File;
@@ -81,40 +83,23 @@ fn main() {
}
}
-fn json_to_arrow(
- json_name: &str,
- arrow_name: &str,
- _verbose: bool,
-) -> Result<(), String> {
- let json_file = File::open(json_name).unwrap();
- let reader = BufReader::new(json_file);
-
- let arrow_json: Value = serde_json::from_reader(reader).unwrap();
-
- let schema = Arc::new(Schema::from(&arrow_json["schema"]).unwrap());
-
- let mut batches = vec![];
-
- for b in arrow_json["batches"].as_array().unwrap() {
- let json_batch: ArrowJsonBatch = serde_json::from_value(b.clone()).unwrap();
- let batch = record_batch_from_json(schema.clone(), json_batch)?;
- batches.push(batch);
- }
+fn json_to_arrow(json_name: &str, arrow_name: &str, _verbose: bool) -> Result<()> {
+ let (schema, batches) = read_json_file(json_name)?;
- let arrow_file = File::create(arrow_name).unwrap();
- let mut writer = FileWriter::try_new(arrow_file, schema.as_ref()).unwrap();
+ let arrow_file = File::create(arrow_name)?;
+ let mut writer = FileWriter::try_new(arrow_file, &schema)?;
for b in batches {
- writer.write(&b).unwrap();
+ writer.write(&b)?;
}
Ok(())
}
fn record_batch_from_json(
- schema: Arc<Schema>,
+ schema: &Schema,
json_batch: ArrowJsonBatch,
-) -> Result<RecordBatch, String> {
+) -> Result<RecordBatch> {
let mut columns = vec![];
for (field, json_col) in schema.fields().iter().zip(json_batch.columns) {
@@ -312,21 +297,22 @@ fn record_batch_from_json(
}
Arc::new(b.finish())
}
- t => return Err(format!("data type {:?} not supported", t)),
+ t => {
+ return Err(ArrowError::JsonError(format!(
+ "data type {:?} not supported",
+ t
+ )))
+ }
};
columns.push(col);
}
- RecordBatch::try_new(schema, columns).map_err(|e| e.to_string())
+ RecordBatch::try_new(Arc::new(schema.clone()), columns)
}
-fn arrow_to_json(
- arrow_name: &str,
- json_name: &str,
- _verbose: bool,
-) -> Result<(), String> {
- let arrow_file = File::open(arrow_name).unwrap();
- let mut reader = FileReader::try_new(arrow_file).unwrap();
+fn arrow_to_json(arrow_name: &str, json_name: &str, _verbose: bool) -> Result<()> {
+ let arrow_file = File::open(arrow_name)?;
+ let mut reader = FileReader::try_new(arrow_file)?;
let mut fields = vec![];
for f in reader.schema().fields() {
@@ -345,12 +331,57 @@ fn arrow_to_json(
dictionaries: None,
};
- let json_file = File::create(json_name).unwrap();
+ let json_file = File::create(json_name)?;
serde_json::to_writer(&json_file, &arrow_json).unwrap();
Ok(())
}
-fn validate(_arrow_name: &str, _json_name: &str, _verbose: bool) -> Result<(), String> {
- panic!("validate not implemented");
+fn validate(arrow_name: &str, json_name: &str, _verbose: bool) -> Result<()> {
+ // open JSON file
+ let (json_schema, json_batches) = read_json_file(json_name)?;
+
+ // open Arrow file
+ let arrow_file = File::open(arrow_name)?;
+ let mut arrow_reader = FileReader::try_new(arrow_file)?;
+ let arrow_schema = arrow_reader.schema().as_ref().to_owned();
+
+ // compare schemas
+ assert!(json_schema == arrow_schema);
+
+ for json_batch in &json_batches {
+ if let Some(arrow_batch) = arrow_reader.next_batch()? {
+ // compare batches
+ assert!(arrow_batch.num_columns() == json_batch.num_columns());
+ assert!(arrow_batch.num_rows() == json_batch.num_rows());
+
+ // TODO compare in more detail
+ } else {
+ return Err(ArrowError::ComputeError(
+ "no more arrow batches left".to_owned(),
+ ));
+ }
+ }
+
+ if let Some(_) = arrow_reader.next_batch()? {
+ return Err(ArrowError::ComputeError(
+ "no more json batches left".to_owned(),
+ ));
+ }
+
+ Ok(())
+}
+
+fn read_json_file(json_name: &str) -> Result<(Schema, Vec<RecordBatch>)> {
+ let json_file = File::open(json_name)?;
+ let reader = BufReader::new(json_file);
+ let arrow_json: Value = serde_json::from_reader(reader).unwrap();
+ let schema = Schema::from(&arrow_json["schema"])?;
+ let mut batches = vec![];
+ for b in arrow_json["batches"].as_array().unwrap() {
+ let json_batch: ArrowJsonBatch = serde_json::from_value(b.clone()).unwrap();
+ let batch = record_batch_from_json(&schema, json_batch)?;
+ batches.push(batch);
+ }
+ Ok((schema, batches))
}