You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/06/09 16:28:29 UTC

[GitHub] [arrow] vertexclique commented on a change in pull request #7379: ARROW-9062 [RUST] json reader dictionary support

vertexclique commented on a change in pull request #7379:
URL: https://github.com/apache/arrow/pull/7379#discussion_r437210110



##########
File path: rust/arrow/src/json/reader.rs
##########
@@ -448,42 +453,55 @@ impl<R: Read> Reader<R> {
                             let mut builder = ListBuilder::new(values_builder);
                             for row in rows {
                                 if let Some(value) = row.get(field.name()) {
-                                        // value can be an array or a scalar
-                                        let vals: Vec<Option<String>> = if let Value::String(v) = value {
-                                            vec![Some(v.to_string())]
-                                        } else if let Value::Array(n) = value {
-                                            n.iter().map(|v: &Value| {
-                                                if v.is_string() {
-                                                    Some(v.as_str().unwrap().to_string())
-                                                } else if v.is_array() || v.is_object() {
-                                                    // implicitly drop nested values
-                                                    // TODO support deep-nesting
-                                                    None
-                                                } else {
-                                                    Some(v.to_string())
-                                                }
-                                            }).collect()
-                                        } else if let Value::Null = value {
-                                            vec![None]
-                                        } else if !value.is_object() {
-                                            vec![Some(value.to_string())]
+                                    // value can be an array or a scalar
+                                    let vals: Vec<Option<String>> = if let Value::String(v) = value {
+                                        vec![Some(v.to_string())]
+                                    } else if let Value::Array(n) = value {
+                                        n.iter().map(|v: &Value| {
+                                            if v.is_string() {
+                                                Some(v.as_str().unwrap().to_string())
+                                            } else if v.is_array() || v.is_object() {
+                                                // implicitly drop nested values
+                                                // TODO support deep-nesting
+                                                None
+                                            } else {
+                                                Some(v.to_string())
+                                            }
+                                        }).collect()
+                                    } else if let Value::Null = value {
+                                        vec![None]
+                                    } else if !value.is_object() {
+                                        vec![Some(value.to_string())]
+                                    } else {
+                                        return Err(ArrowError::JsonError("Only scalars are currently supported in JSON arrays".to_string()));
+                                    };

Review comment:
       Better to write a match statements to make it readable.

##########
File path: rust/arrow/src/json/reader.rs
##########
@@ -621,6 +639,32 @@ impl<R: Read> Reader<R> {
         }
         Ok(Arc::new(builder.finish()))
     }
+
+    fn build_dictionary_array<T: ArrowPrimitiveType>(
+        &self,
+        rows: &[Value],
+        col_name: &str,
+    ) -> Result<ArrayRef>
+    where
+        T::Native: num::NumCast,
+        T: ArrowDictionaryKeyType,
+    {
+        let key_builder = PrimitiveBuilder::<T>::new(rows.len());
+        let value_builder = StringBuilder::new(100);
+        let mut builder = StringDictionaryBuilder::new(key_builder, value_builder);
+        for row in rows {
+            if let Some(value) = row.get(&col_name) {
+                if let Some(str_v) = value.as_str() {
+                    builder.append(str_v).map(|_| ())?
+                } else {
+                    builder.append_null()?
+                }

Review comment:
       map_or_else

##########
File path: rust/arrow/src/json/reader.rs
##########
@@ -621,6 +639,32 @@ impl<R: Read> Reader<R> {
         }
         Ok(Arc::new(builder.finish()))
     }
+
+    fn build_dictionary_array<T: ArrowPrimitiveType>(
+        &self,
+        rows: &[Value],
+        col_name: &str,
+    ) -> Result<ArrayRef>
+    where
+        T::Native: num::NumCast,
+        T: ArrowDictionaryKeyType,
+    {
+        let key_builder = PrimitiveBuilder::<T>::new(rows.len());
+        let value_builder = StringBuilder::new(100);
+        let mut builder = StringDictionaryBuilder::new(key_builder, value_builder);
+        for row in rows {
+            if let Some(value) = row.get(&col_name) {
+                if let Some(str_v) = value.as_str() {
+                    builder.append(str_v).map(|_| ())?
+                } else {
+                    builder.append_null()?
+                }
+            } else {
+                builder.append_null()?
+            }

Review comment:
       map_or_else can be here too.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org