You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/12/28 04:31:34 UTC

[doris] branch branch-1.2-lts updated: [Improvement](thirdparty)upgrade simdjson from 1.0.2 to 3.0.1 (#15412)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
     new 8fbf5c5c16 [Improvement](thirdparty)upgrade simdjson from 1.0.2 to 3.0.1 (#15412)
8fbf5c5c16 is described below

commit 8fbf5c5c1632d5adfa8a17f92553c4e07b627d7e
Author: Kang <kx...@gmail.com>
AuthorDate: Wed Dec 28 12:24:16 2022 +0800

    [Improvement](thirdparty)upgrade simdjson from 1.0.2 to 3.0.1 (#15412)
    
    Upgrade simdjson from 1.0.2 to latest version 3.0.1 to avoid -mlzcnt compiler flag causing BE UT(macOS) failure.
    simdjson is now only used by VJsonScanner and disabled by default. So the impact of upgrade is limited.
---
 dist/LICENSE-dist.txt                              |  2 +-
 thirdparty/CHANGELOG.md                            |  3 ++
 thirdparty/download-thirdparty.sh                  |  4 +-
 .../{simdjson-1.0.2.patch => simdjson-3.0.1.patch} | 54 +++++++++++-----------
 thirdparty/vars.sh                                 |  8 ++--
 5 files changed, 37 insertions(+), 34 deletions(-)

diff --git a/dist/LICENSE-dist.txt b/dist/LICENSE-dist.txt
index 4fe86da53f..837cfc0462 100644
--- a/dist/LICENSE-dist.txt
+++ b/dist/LICENSE-dist.txt
@@ -1529,7 +1529,7 @@ The Apache Software License, Version 2.0
     * cctz: 2.3
     * aws sdk: 1.9.211
     * benchmark: 1.5.6
-    * simdjson: 1.0.2
+    * simdjson: 3.0.1
     * libhdfs3: 2.3.0
     * libhdfs3: commit 5fccd36
     * opentelemetry-proto: 0.18.0
diff --git a/thirdparty/CHANGELOG.md b/thirdparty/CHANGELOG.md
index 2fd276f488..13f273ecfb 100644
--- a/thirdparty/CHANGELOG.md
+++ b/thirdparty/CHANGELOG.md
@@ -2,6 +2,9 @@
 
 This file contains version of the third-party dependency libraries in the build-env image. The docker build-env image is apache/doris, and the tag is `build-env-${version}`
 
+## v20221228
+- Modified: simdjson 1.0.2 -> 3.0.1
+
 ## v20221015
 
 - Modified: zstd 1.5.0 -> 1.5.2
diff --git a/thirdparty/download-thirdparty.sh b/thirdparty/download-thirdparty.sh
index f0bd2130b5..6cae1cc07d 100755
--- a/thirdparty/download-thirdparty.sh
+++ b/thirdparty/download-thirdparty.sh
@@ -397,10 +397,10 @@ cd -
 echo "Finished patching ${BRPC_SOURCE}"
 
 # patch jemalloc, change simdjson::dom::element_type::BOOL to BOOLEAN to avoid conflict with odbc macro BOOL
-if [[ "${SIMDJSON_SOURCE}" = "simdjson-1.0.2" ]]; then
+if [[ "${SIMDJSON_SOURCE}" = "simdjson-3.0.1" ]]; then
     cd "${TP_SOURCE_DIR}/${SIMDJSON_SOURCE}"
     if [[ ! -f "${PATCHED_MARK}" ]]; then
-        patch -p1 <"${TP_PATCH_DIR}/simdjson-1.0.2.patch"
+        patch -p1 <"${TP_PATCH_DIR}/simdjson-3.0.1.patch"
         touch "${PATCHED_MARK}"
     fi
     cd -
diff --git a/thirdparty/patches/simdjson-1.0.2.patch b/thirdparty/patches/simdjson-3.0.1.patch
similarity index 65%
rename from thirdparty/patches/simdjson-1.0.2.patch
rename to thirdparty/patches/simdjson-3.0.1.patch
index c2d1be717e..d6145b19af 100644
--- a/thirdparty/patches/simdjson-1.0.2.patch
+++ b/thirdparty/patches/simdjson-3.0.1.patch
@@ -1,6 +1,6 @@
-diff -ur a/fuzz/fuzz_dump.cpp b/fuzz/fuzz_dump.cpp
---- a/fuzz/fuzz_dump.cpp	2021-10-28 07:29:42.000000000 +0800
-+++ b/fuzz/fuzz_dump.cpp	2022-12-20 21:20:13.068613831 +0800
+diff -Naur a/fuzz/fuzz_dump.cpp b/fuzz/fuzz_dump.cpp
+--- a/fuzz/fuzz_dump.cpp	2022-11-23 23:59:48.000000000 +0800
++++ b/fuzz/fuzz_dump.cpp	2022-12-27 17:59:16.614067037 +0800
 @@ -48,7 +48,7 @@
    case simdjson::dom::element_type::STRING:
      os << element.get_string().value_unsafe() << endl;
@@ -10,9 +10,9 @@ diff -ur a/fuzz/fuzz_dump.cpp b/fuzz/fuzz_dump.cpp
      os << element.get_bool().value_unsafe() << endl;
      break;
    case simdjson::dom::element_type::NULL_VALUE:
-diff -ur a/include/simdjson/dom/element.h b/include/simdjson/dom/element.h
---- a/include/simdjson/dom/element.h	2021-10-28 07:29:42.000000000 +0800
-+++ b/include/simdjson/dom/element.h	2022-12-20 21:19:28.213840603 +0800
+diff -Naur a/include/simdjson/dom/element.h b/include/simdjson/dom/element.h
+--- a/include/simdjson/dom/element.h	2022-11-23 23:59:48.000000000 +0800
++++ b/include/simdjson/dom/element.h	2022-12-27 17:59:16.614067037 +0800
 @@ -27,7 +27,7 @@
    UINT64 = 'u',    ///< uint64_t: any integer that fits in uint64_t but *not* int64_t
    DOUBLE = 'd',    ///< double: Any number with a "." or "e" that fits in double.
@@ -22,19 +22,19 @@ diff -ur a/include/simdjson/dom/element.h b/include/simdjson/dom/element.h
    NULL_VALUE = 'n' ///< null
  };
  
-diff -ur a/include/simdjson/dom/element-inl.h b/include/simdjson/dom/element-inl.h
---- a/include/simdjson/dom/element-inl.h	2021-10-28 07:29:42.000000000 +0800
-+++ b/include/simdjson/dom/element-inl.h	2022-12-20 21:23:03.064754395 +0800
-@@ -187,7 +187,7 @@
- 
+diff -Naur a/include/simdjson/dom/element-inl.h b/include/simdjson/dom/element-inl.h
+--- a/include/simdjson/dom/element-inl.h	2022-11-23 23:59:48.000000000 +0800
++++ b/include/simdjson/dom/element-inl.h	2022-12-27 17:59:16.615067032 +0800
+@@ -188,7 +188,7 @@
  inline element_type element::type() const noexcept {
+   SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914
    auto tape_type = tape.tape_ref_type();
 -  return tape_type == internal::tape_type::FALSE_VALUE ? element_type::BOOL : static_cast<element_type>(tape_type);
 +  return tape_type == internal::tape_type::FALSE_VALUE ? element_type::BOOLEAN : static_cast<element_type>(tape_type);
  }
  
  inline simdjson_result<bool> element::get_bool() const noexcept {
-@@ -413,7 +413,7 @@
+@@ -425,7 +425,7 @@
        return out << "double";
      case element_type::STRING:
        return out << "string";
@@ -43,10 +43,10 @@ diff -ur a/include/simdjson/dom/element-inl.h b/include/simdjson/dom/element-inl
        return out << "bool";
      case element_type::NULL_VALUE:
        return out << "null";
-diff -ur a/singleheader/simdjson.h b/singleheader/simdjson.h
---- a/singleheader/simdjson.h	2021-10-28 07:29:42.000000000 +0800
-+++ b/singleheader/simdjson.h	2022-12-20 21:19:55.295703686 +0800
-@@ -5167,7 +5167,7 @@
+diff -Naur a/singleheader/simdjson.h b/singleheader/simdjson.h
+--- a/singleheader/simdjson.h	2022-11-23 23:59:48.000000000 +0800
++++ b/singleheader/simdjson.h	2022-12-27 17:59:16.619067010 +0800
+@@ -5301,7 +5301,7 @@
    UINT64 = 'u',    ///< uint64_t: any integer that fits in uint64_t but *not* int64_t
    DOUBLE = 'd',    ///< double: Any number with a "." or "e" that fits in double.
    STRING = '"',    ///< std::string_view
@@ -55,16 +55,16 @@ diff -ur a/singleheader/simdjson.h b/singleheader/simdjson.h
    NULL_VALUE = 'n' ///< null
  };
  
-@@ -7008,7 +7008,7 @@
- 
+@@ -7149,7 +7149,7 @@
  inline element_type element::type() const noexcept {
+   SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914
    auto tape_type = tape.tape_ref_type();
 -  return tape_type == internal::tape_type::FALSE_VALUE ? element_type::BOOL : static_cast<element_type>(tape_type);
 +  return tape_type == internal::tape_type::FALSE_VALUE ? element_type::BOOLEAN : static_cast<element_type>(tape_type);
  }
  
  inline simdjson_result<bool> element::get_bool() const noexcept {
-@@ -7234,7 +7234,7 @@
+@@ -7386,7 +7386,7 @@
        return out << "double";
      case element_type::STRING:
        return out << "string";
@@ -73,10 +73,10 @@ diff -ur a/singleheader/simdjson.h b/singleheader/simdjson.h
        return out << "bool";
      case element_type::NULL_VALUE:
        return out << "null";
-diff -ur a/tests/dom/basictests.cpp b/tests/dom/basictests.cpp
---- a/tests/dom/basictests.cpp	2021-10-28 07:29:42.000000000 +0800
-+++ b/tests/dom/basictests.cpp	2022-12-20 21:18:54.684010105 +0800
-@@ -1499,7 +1499,7 @@
+diff -Naur a/tests/dom/basictests.cpp b/tests/dom/basictests.cpp
+--- a/tests/dom/basictests.cpp	2022-11-23 23:59:48.000000000 +0800
++++ b/tests/dom/basictests.cpp	2022-12-27 17:59:16.619067010 +0800
+@@ -1567,7 +1567,7 @@
      simdjson_result<dom::element> result = parser.parse(ALL_TYPES_JSON)[key];
  
      return true
@@ -85,10 +85,10 @@ diff -ur a/tests/dom/basictests.cpp b/tests/dom/basictests.cpp
        && test_cast_error<dom::array>(result, INCORRECT_TYPE)
        && test_cast_error<dom::object>(result, INCORRECT_TYPE)
        && test_cast_error<std::string_view>(result, INCORRECT_TYPE)
-diff -ur a/tests/dom/readme_examples.cpp b/tests/dom/readme_examples.cpp
---- a/tests/dom/readme_examples.cpp	2021-10-28 07:29:42.000000000 +0800
-+++ b/tests/dom/readme_examples.cpp	2022-12-20 21:19:06.757949077 +0800
-@@ -204,7 +204,7 @@
+diff -Naur a/tests/dom/readme_examples.cpp b/tests/dom/readme_examples.cpp
+--- a/tests/dom/readme_examples.cpp	2022-11-23 23:59:48.000000000 +0800
++++ b/tests/dom/readme_examples.cpp	2022-12-27 17:59:16.619067010 +0800
+@@ -208,7 +208,7 @@
        case dom::element_type::STRING:
          cout << std::string_view(element) << endl;
          break;
diff --git a/thirdparty/vars.sh b/thirdparty/vars.sh
index 12b3808c8f..0849a83528 100644
--- a/thirdparty/vars.sh
+++ b/thirdparty/vars.sh
@@ -388,10 +388,10 @@ XSIMD_SOURCE=xsimd-aeec9c872c8b475dedd7781336710f2dd2666cb2
 XSIMD_MD5SUM="d024855f71c0a2837a6918c0f8f66245"
 
 # simdjson
-SIMDJSON_DOWNLOAD="https://github.com/simdjson/simdjson/archive/refs/tags/v1.0.2.tar.gz"
-SIMDJSON_NAME=simdjson-1.0.2.tar.gz
-SIMDJSON_SOURCE=simdjson-1.0.2
-SIMDJSON_MD5SUM="5bb34cca7087a99c450dbdfe406bdc7d"
+SIMDJSON_DOWNLOAD="https://github.com/simdjson/simdjson/archive/refs/tags/v3.0.1.tar.gz"
+SIMDJSON_NAME=simdjson-3.0.1.tar.gz
+SIMDJSON_SOURCE=simdjson-3.0.1
+SIMDJSON_MD5SUM="993576b47249f2bade2bfb2552b2896a"
 
 # nlohmann_json
 NLOHMANN_JSON_DOWNLOAD="https://github.com/nlohmann/json/archive/refs/tags/v3.10.1.tar.gz"


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org