You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/10/04 11:13:48 UTC

[GitHub] [arrow] alamb commented on a change in pull request #8333: ARROW-10167: [Rust] [DataFusion] Support DictionaryArray in sql.rs tests, by using standard pretty printer

alamb commented on a change in pull request #8333:
URL: https://github.com/apache/arrow/pull/8333#discussion_r499143816



##########
File path: rust/datafusion/tests/sql.rs
##########
@@ -122,8 +133,8 @@ async fn csv_count_star() -> Result<()> {
     let mut ctx = ExecutionContext::new();
     register_aggregate_csv(&mut ctx)?;
     let sql = "SELECT COUNT(*), COUNT(1), COUNT(c1) FROM aggregate_test_100";
-    let actual = execute(&mut ctx, sql).await.join("\n");
-    let expected = "100\t100\t100".to_string();

Review comment:
       this is a pretty good example of the change in structure. 

##########
File path: rust/arrow/src/util/pretty.rs
##########
@@ -83,11 +87,12 @@ macro_rules! make_string {
     }};
 }
 
-/// Get the value at the given row in an array as a string
-fn array_value_to_string(column: array::ArrayRef, row: usize) -> Result<String> {
+/// Get the value at the given row in an array as a String
+pub fn array_value_to_string(column: &array::ArrayRef, row: usize) -> Result<String> {
     match column.data_type() {
         DataType::Utf8 => make_string!(array::StringArray, column, row),
         DataType::Boolean => make_string!(array::BooleanArray, column, row),
+        DataType::Int8 => make_string!(array::Int8Array, column, row),

Review comment:
       Note that this type was missing in pretty.rs (I found it while converting the tests in sql.rs).

##########
File path: rust/datafusion/tests/sql.rs
##########
@@ -638,107 +915,62 @@ fn register_alltypes_parquet(ctx: &mut ExecutionContext) {
     .unwrap();
 }
 
-/// Execute query and return result set as tab delimited string
-async fn execute(ctx: &mut ExecutionContext, sql: &str) -> Vec<String> {
+/// Execute query and return result set as 2-d table of Vecs
+/// `result[row][column]`
+async fn execute(ctx: &mut ExecutionContext, sql: &str) -> Vec<Vec<String>> {
     let plan = ctx.create_logical_plan(&sql).unwrap();
     let logical_schema = plan.schema();
     let plan = ctx.optimize(&plan).unwrap();
     let optimized_logical_schema = plan.schema();
     let plan = ctx.create_physical_plan(&plan).unwrap();
     let physical_schema = plan.schema();
+
     let results = ctx.collect(plan).await.unwrap();
 
     assert_eq!(logical_schema.as_ref(), optimized_logical_schema.as_ref());
     assert_eq!(logical_schema.as_ref(), physical_schema.as_ref());
 
-    result_str(&results)
+    result_vec(&results)
 }
 
-/// Converts an array's value at `row_index` to a string.
-fn array_str(array: &Arc<dyn Array>, row_index: usize) -> String {
-    if array.is_null(row_index) {
+/// Specialised String representation
+fn col_str(column: &ArrayRef, row_index: usize) -> String {
+    if column.is_null(row_index) {
         return "NULL".to_string();
     }
-    // beyond this point, we can assume that `array...downcast().value(row_index)` is valid,
-    // due to the `if` above.
-
-    match array.data_type() {
-        DataType::Int8 => {
-            let array = array.as_any().downcast_ref::<Int8Array>().unwrap();
-            format!("{:?}", array.value(row_index))
-        }
-        DataType::Int16 => {
-            let array = array.as_any().downcast_ref::<Int16Array>().unwrap();
-            format!("{:?}", array.value(row_index))
-        }
-        DataType::Int32 => {
-            let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
-            format!("{:?}", array.value(row_index))
-        }
-        DataType::Int64 => {
-            let array = array.as_any().downcast_ref::<Int64Array>().unwrap();
-            format!("{:?}", array.value(row_index))
-        }
-        DataType::UInt8 => {
-            let array = array.as_any().downcast_ref::<UInt8Array>().unwrap();
-            format!("{:?}", array.value(row_index))
-        }
-        DataType::UInt16 => {
-            let array = array.as_any().downcast_ref::<UInt16Array>().unwrap();
-            format!("{:?}", array.value(row_index))
-        }
-        DataType::UInt32 => {
-            let array = array.as_any().downcast_ref::<UInt32Array>().unwrap();
-            format!("{:?}", array.value(row_index))
-        }
-        DataType::UInt64 => {
-            let array = array.as_any().downcast_ref::<UInt64Array>().unwrap();
-            format!("{:?}", array.value(row_index))
-        }
-        DataType::Float32 => {
-            let array = array.as_any().downcast_ref::<Float32Array>().unwrap();
-            format!("{:?}", array.value(row_index))
-        }
-        DataType::Float64 => {
-            let array = array.as_any().downcast_ref::<Float64Array>().unwrap();
-            format!("{:?}", array.value(row_index))
-        }
-        DataType::Utf8 => {
-            let array = array.as_any().downcast_ref::<StringArray>().unwrap();
-            format!("{:?}", array.value(row_index))
-        }
-        DataType::Boolean => {
-            let array = array.as_any().downcast_ref::<BooleanArray>().unwrap();
-            format!("{:?}", array.value(row_index))
-        }
-        DataType::FixedSizeList(_, n) => {
-            let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
-            let array = array.value(row_index);
 
-            let mut r = Vec::with_capacity(*n as usize);
-            for i in 0..*n {
-                r.push(array_str(&array, i as usize));
-            }
-            format!("[{}]", r.join(","))
+    // Special case ListArray as there is no pretty print support for it yet

Review comment:
       I did cheat somewhat here and punt on adding proper ListArray support to pretty.rs. 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org