You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by GitBox <gi...@apache.org> on 2020/07/22 06:05:57 UTC

[GitHub] [incubator-doris] morningman commented on a change in pull request #4136: Fixbug: json load

morningman commented on a change in pull request #4136:
URL: https://github.com/apache/incubator-doris/pull/4136#discussion_r458546950



##########
File path: be/src/exec/json_scanner.h
##########
@@ -106,33 +105,31 @@ struct JsonPath;
 class JsonReader {
 public:
     JsonReader(RuntimeState* state, ScannerCounter* counter, RuntimeProfile* profile, FileReader* file_reader,
-            std::string& jsonpath, bool strip_outer_array);
+            bool strip_outer_array);
     ~JsonReader();
 
-    Status init(); // must call before use
+    Status init(std::string& jsonpath, std::string& json_root); // must call before use
 
     Status read(Tuple* tuple, const std::vector<SlotDescriptor*>& slot_descs, MemPool* tuple_pool, bool* eof);
 
 private:
+    Status (JsonReader::*_handle_json_callback)(Tuple* tuple, const std::vector<SlotDescriptor*>& slot_descs, MemPool* tuple_pool, bool* eof);
     Status _handle_simple_json(Tuple* tuple, const std::vector<SlotDescriptor*>& slot_descs, MemPool* tuple_pool, bool* eof);
-    Status _handle_complex_json(Tuple* tuple, const std::vector<SlotDescriptor*>& slot_descs, MemPool* tuple_pool, bool* eof);
     Status _handle_flat_array_complex_json(Tuple* tuple, const std::vector<SlotDescriptor*>& slot_descs, MemPool* tuple_pool, bool* eof);
     Status _handle_nested_complex_json(Tuple* tuple, const std::vector<SlotDescriptor*>& slot_descs, MemPool* tuple_pool, bool* eof);
 
     void _fill_slot(Tuple* tuple, SlotDescriptor* slot_desc, MemPool* mem_pool, const uint8_t* value, int32_t len);
-    void _assemble_jmap(const std::vector<SlotDescriptor*>& slot_descs);
+    int _assemble_jmap(const std::vector<SlotDescriptor*>& slot_descs);

Review comment:
       Remove this method

##########
File path: be/src/exec/json_scanner.cpp
##########
@@ -197,30 +201,52 @@ JsonReader::~JsonReader() {
     _close();
 }
 
-Status JsonReader::init() {
+Status JsonReader::init(std::string& jsonpath, std::string& json_root) {
     // parse jsonpath
+    if (!jsonpath.empty()) {
+        Status st = _generater_json_paths(jsonpath, _parsed_jsonpaths);

Review comment:
       ```suggestion
           Status st = _generate_json_paths(jsonpath, _parsed_jsonpaths);
   ```

##########
File path: be/src/exec/json_scanner.cpp
##########
@@ -197,30 +201,52 @@ JsonReader::~JsonReader() {
     _close();
 }
 
-Status JsonReader::init() {
+Status JsonReader::init(std::string& jsonpath, std::string& json_root) {
     // parse jsonpath
+    if (!jsonpath.empty()) {
+        Status st = _generater_json_paths(jsonpath, _parsed_jsonpaths);
+        RETURN_IF_ERROR(st);
+    }
+    if (!json_root.empty()) {
+        std::vector<JsonPath> parsed_paths;
+        JsonFunctions::parse_json_paths(json_root, &parsed_paths);
+        _parsed_json_root.push_back(parsed_paths);

Review comment:
       We only support one json root, so we can check it here, and return error if json root more than one.

##########
File path: be/src/exec/json_scanner.h
##########
@@ -106,33 +105,31 @@ struct JsonPath;
 class JsonReader {
 public:
     JsonReader(RuntimeState* state, ScannerCounter* counter, RuntimeProfile* profile, FileReader* file_reader,
-            std::string& jsonpath, bool strip_outer_array);
+            bool strip_outer_array);
     ~JsonReader();
 
-    Status init(); // must call before use
+    Status init(std::string& jsonpath, std::string& json_root); // must call before use

Review comment:
       Can these parameters be changed to const &?

##########
File path: be/src/exec/json_scanner.cpp
##########
@@ -257,26 +283,38 @@ Status JsonReader::_parse_json_doc(bool* eof) {
         delete[] json_str;
         return Status::DataQualityError(str_error.str());
     }
+    delete[] json_str;
 
-    if (_json_doc.IsArray() && !_strip_outer_array) {
-        delete[] json_str;
+    // set json root
+    if (_parsed_json_root.size() != 0) {

Review comment:
       Why _parsed_json_root is a vector?

##########
File path: be/src/exec/json_scanner.h
##########
@@ -144,9 +141,9 @@ class JsonReader {
     RuntimeProfile::Counter* _bytes_read_counter;
     RuntimeProfile::Counter* _read_timer;
     std::vector<std::vector<JsonPath>> _parsed_jsonpaths;
+    std::vector<std::vector<JsonPath>> _parsed_json_root;
     rapidjson::Document _json_doc;
-    //key: column name
-    std::unordered_map<std::string, JsonDataInternal> _jmap;
+    rapidjson::Value *_json_root;

Review comment:
       change the name of `_json_root`. This name is easily confused with `_parsed_json_root`.
   My suggestion: `_final_json_doc`. And add comment to explain that this is generated from `_json_doc`




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org