You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by GitBox <gi...@apache.org> on 2020/07/07 02:51:28 UTC

[GitHub] [incubator-doris] morningman commented on a change in pull request #4020: [Load][Json] Refactor json load logic to make it more reasonable

morningman commented on a change in pull request #4020:
URL: https://github.com/apache/incubator-doris/pull/4020#discussion_r450582767



##########
File path: be/src/exec/json_scanner.cpp
##########
@@ -406,54 +459,60 @@ Status JsonReader::handle_simple_json(Tuple* tuple, const std::vector<SlotDescri
     return Status::OK();
 }
 
-Status JsonReader::set_tuple_value_from_map(Tuple* tuple, const std::vector<SlotDescriptor*>& slot_descs, MemPool* tuple_pool, bool *valid) {
+// for complex format json with strip_outer_array = false
+Status JsonReader::_set_tuple_value_from_jmap(Tuple* tuple, const std::vector<SlotDescriptor*>& slot_descs, MemPool* tuple_pool, bool *valid) {
     std::unordered_map<std::string, JsonDataInternal>::iterator it_map;
     for (auto v : slot_descs) {
         it_map = _jmap.find(v->col_name());
         if (it_map == _jmap.end()) {
-            return Status::RuntimeError("The column name of table is not foud in jsonpath.");
+            return Status::RuntimeError("The column name of table is not foud in jsonpath: " + v->col_name());
         }
-        rapidjson::Value::ConstValueIterator value = it_map->second.get_next();
+        rapidjson::Value* value = it_map->second.get_value();
         if (value == nullptr) {
             if (v->is_nullable()) {
                 tuple->set_null(v->null_indicator_offset());
             } else  {
                 std::stringstream str_error;
                 str_error << "The column `" << it_map->first << "` is not nullable, but it's not found in jsondata.";
-                _state->append_error_msg_to_file("", str_error.str());
+                _state->append_error_msg_to_file(_print_json_value(*value), str_error.str());
                 _counter->num_rows_filtered++;
                 *valid = false; // current row is invalid
                 break;
             }
         } else {
-            RETURN_IF_ERROR(write_data_to_tuple(value, v, tuple, tuple_pool));
+            _write_data_to_tuple(value, v, tuple, tuple_pool, valid);
+            if (!(*valid)) {
+                return Status::OK();
+            }
         }
     }
     *valid = true;
     return Status::OK();
 }
 
-Status JsonReader::handle_nest_complex_json(Tuple* tuple, const std::vector<SlotDescriptor*>& slot_descs, MemPool* tuple_pool, bool* eof) {
+// _json_doc should be an object
+Status JsonReader::_handle_nested_complex_json(Tuple* tuple, const std::vector<SlotDescriptor*>& slot_descs, MemPool* tuple_pool, bool* eof) {
     do {
         bool valid = false;
         if (_next_line >= _total_lines) {
-            RETURN_IF_ERROR(parse_json_doc(eof));
-            if (*eof) {
-                return Status::OK();
+            Status st = _parse_json_doc(eof);
+            if (st.is_data_quality_error()) {
+                continue; // continue to read next
             }
-            _total_lines = get_data_by_jsonpath(slot_descs);
-            if (_total_lines == -1) {
-                return Status::InternalError("Parse json data is failed.");
-            } else if (_total_lines == 0) {
-                *eof = true;
+            RETURN_IF_ERROR(st); // terminate if encounter other errors

Review comment:
       file reader may return other error




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org