You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nifi.apache.org by sz...@apache.org on 2023/02/17 00:21:50 UTC

[nifi-minifi-cpp] 04/04: MINIFICPP-2007 Add rocksdb compression options

This is an automated email from the ASF dual-hosted git repository.

szaszm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/nifi-minifi-cpp.git

commit 2c7f989aea41d801ba22507b703664cae9dac1cc
Author: Gabor Gyimesi <ga...@gmail.com>
AuthorDate: Fri Feb 17 01:16:13 2023 +0100

    MINIFICPP-2007 Add rocksdb compression options
    
    - Add bundled zstd and lz4 thirdparty libraries
    - Upgrade rocksdb to version 7.7.3
    - Add compression options bzip2, zlib, zstd and lz4 on Unix and xpress
      on Windows
    
    Closes #1480
    Signed-off-by: Marton Szasz <sz...@apache.org>
---
 CMakeLists.txt                                     |   6 ++
 CONFIGURE.md                                       |   9 ++
 LICENSE                                            | 102 ++++++++++++++++-----
 NOTICE                                             |   2 +
 cmake/BundledRocksDB.cmake                         |  35 ++++++-
 cmake/BundledZLIB.cmake                            |   1 -
 cmake/LZ4.cmake                                    |  53 +++++++++++
 cmake/Zstd.cmake                                   |  56 +++++++++++
 cmake/lz4/dummy/Findlz4.cmake                      |  33 +++++++
 cmake/zstd/dummy/Findzstd.cmake                    |  33 +++++++
 conf/minifi.properties                             |   2 +
 extensions/libarchive/CMakeLists.txt               |   6 --
 .../rocksdb-repos/DatabaseContentRepository.cpp    |   5 +-
 extensions/rocksdb-repos/FlowFileRepository.cpp    |   5 +-
 extensions/rocksdb-repos/database/RocksDbUtils.cpp |  54 +++++++++++
 extensions/rocksdb-repos/database/RocksDbUtils.h   |  21 ++---
 .../rocksdb-repos/database/StringAppender.cpp      |   2 +-
 .../standard-processors/tests/unit/PutTCPTests.cpp |   4 +-
 libminifi/include/properties/Configuration.h       |   2 +
 libminifi/src/Configuration.cpp                    |   2 +
 ...e_gcc_clang_compiler_options_from_windows.patch |  37 ++++++++
 21 files changed, 420 insertions(+), 50 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 513afe7ae..3eac599b1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -214,6 +214,12 @@ else()
     message(VERBOSE "No custom malloc implementation")
 endif()
 
+if (NOT DISABLE_BZIP2 AND (NOT DISABLE_LIBARCHIVE OR (NOT DISABLE_ROCKSDB AND NOT WIN32)))
+    include(BundledBZip2)
+    use_bundled_bzip2(${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR})
+    list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/bzip2/dummy")
+endif()
+
 if(NOT WIN32)
     if (ENABLE_JNI)
         set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DENABLE_JNI")
diff --git a/CONFIGURE.md b/CONFIGURE.md
index 304407ecb..c9beff2c1 100644
--- a/CONFIGURE.md
+++ b/CONFIGURE.md
@@ -162,6 +162,15 @@ folder. You may specify your own path in place of these defaults.
     nifi.flowfile.repository.directory.default=${MINIFI_HOME}/flowfile_repository
     nifi.database.content.repository.directory.default=${MINIFI_HOME}/content_repository
 
+### Configuring compression for rocksdb database
+
+Rocksdb has an option to set compression type for its database to use less disk space.
+If content repository or flow file repository is set to use the rocksdb database as their storage, then we have the option to compress those repositories. On Unix operating systems `zlib`, `bzip2`, `zstd`, `lz4` and `lz4hc` compression types and on Windows `xpress` compression type is supported by MiNiFi C++. If the property is set to `auto` then `xpress` will be used on Windows, `zstd` on Unix operating systems. These options can be set in the minifi.properies file with the following pr [...]
+
+     in minifi.properties
+     nifi.flowfile.repository.rocksdb.compression=zlib
+     nifi.content.repository.rocksdb.compression=auto
+
 #### Shared database
 
 It is also possible to use a single database to store multiple repositories with the `minifidb://` scheme.
diff --git a/LICENSE b/LICENSE
index 823ac8688..e2697b9d3 100644
--- a/LICENSE
+++ b/LICENSE
@@ -3407,32 +3407,90 @@ For these and/or other purposes and motivations, and without any expectation of
     Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work.
 ```
 
---------------------------------------------------------------------------
-
 This product bundles 'prometheus-cpp' which is available under an MIT license.
 
-  MIT License
+    MIT License
 
-  Copyright (c) 2016-2021 Jupp Mueller
-  Copyright (c) 2017-2022 Gregor Jasny
+    Copyright (c) 2016-2021 Jupp Mueller
+    Copyright (c) 2017-2022 Gregor Jasny
 
-  And many contributors, see
-  https://github.com/jupp0r/prometheus-cpp/graphs/contributors
+    And many contributors, see
+    https://github.com/jupp0r/prometheus-cpp/graphs/contributors
 
-  Permission is hereby granted, free of charge, to any person obtaining a copy
-  of this software and associated documentation files (the "Software"), to deal
-  in the Software without restriction, including without limitation the rights
-  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-  copies of the Software, and to permit persons to whom the Software is
-  furnished to do so, subject to the following conditions:
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to deal
+    in the Software without restriction, including without limitation the rights
+    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+    copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
 
-  The above copyright notice and this permission notice shall be included in all
-  copies or substantial portions of the Software.
+    The above copyright notice and this permission notice shall be included in all
+    copies or substantial portions of the Software.
 
-  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-  SOFTWARE.
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+    SOFTWARE.
+
+This product bundles 'Zstandard' which is  available under a BSD License.
+
+    BSD License
+
+    For Zstandard software
+
+    Copyright (c) 2016-present, Facebook, Inc. All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without modification,
+    are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice, this
+      list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimer in the documentation
+      and/or other materials provided with the distribution.
+
+    * Neither the name Facebook nor the names of its contributors may be used to
+      endorse or promote products derived from this software without specific
+      prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+    ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+    DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+    ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+    ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+This product bundles 'LZ4 Library' which is  available under a BSD 2-Clause license.
+
+    LZ4 Library
+    Copyright (c) 2011-2020, Yann Collet
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without modification,
+    are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice, this
+      list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright notice, this
+      list of conditions and the following disclaimer in the documentation and/or
+      other materials provided with the distribution.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+    ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+    DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+    ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+    ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/NOTICE b/NOTICE
index 7502e6c9e..60476b637 100644
--- a/NOTICE
+++ b/NOTICE
@@ -68,6 +68,8 @@ This software includes third party software subject to the following copyrights:
 - abseil-cpp - Google Inc.
 - crc32c - Google Inc., Fangming Fang, Vadim Skipin, Rodrigo Tobar, Harry Mallon
 - prometheus-cpp - Copyright (c) 2016-2021 Jupp Mueller, Copyright (c) 2017-2022 Gregor Jasny
+- Zstandard - Copyright (c) 2016-present, Facebook, Inc. All rights reserved.
+- LZ4 Library - Copyright (c) 2011-2020, Yann Collet
 
 The licenses for these third party components are included in LICENSE.txt
 
diff --git a/cmake/BundledRocksDB.cmake b/cmake/BundledRocksDB.cmake
index 989204ebc..b44855ee1 100644
--- a/cmake/BundledRocksDB.cmake
+++ b/cmake/BundledRocksDB.cmake
@@ -18,6 +18,14 @@
 function(use_bundled_rocksdb SOURCE_DIR BINARY_DIR)
     message("Using bundled RocksDB")
 
+    if (NOT WIN32)
+        include(Zstd)
+        list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/zstd/dummy")
+
+        include(LZ4)
+        list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/lz4/dummy")
+    endif()
+
     # Define byproducts
     if (WIN32)
         set(BYPRODUCT "lib/rocksdb.lib")
@@ -43,18 +51,29 @@ function(use_bundled_rocksdb SOURCE_DIR BINARY_DIR)
         list(APPEND ROCKSDB_CMAKE_ARGS -DPORTABLE=ON)
     endif()
     if(WIN32)
-        list(APPEND ROCKSDB_CMAKE_ARGS -DROCKSDB_INSTALL_ON_WINDOWS=ON)
+        list(APPEND ROCKSDB_CMAKE_ARGS
+                -DROCKSDB_INSTALL_ON_WINDOWS=ON
+                -DWITH_XPRESS=ON)
+    else()
+        list(APPEND ROCKSDB_CMAKE_ARGS
+                -DWITH_ZLIB=ON
+                -DWITH_BZ2=ON
+                -DWITH_ZSTD=ON
+                -DWITH_LZ4=ON)
     endif()
 
+    append_third_party_passthrough_args(ROCKSDB_CMAKE_ARGS "${ROCKSDB_CMAKE_ARGS}")
+
     # Build project
     ExternalProject_Add(
             rocksdb-external
-            URL "https://github.com/facebook/rocksdb/archive/refs/tags/v6.29.5.tar.gz"
-            URL_HASH "SHA256=ddbf84791f0980c0bbce3902feb93a2c7006f6f53bfd798926143e31d4d756f0"
+            URL "https://github.com/facebook/rocksdb/archive/refs/tags/v7.7.3.tar.gz"
+            URL_HASH "SHA256=b8ac9784a342b2e314c821f6d701148912215666ac5e9bdbccd93cf3767cb611"
             SOURCE_DIR "${BINARY_DIR}/thirdparty/rocksdb-src"
             CMAKE_ARGS ${ROCKSDB_CMAKE_ARGS}
             BUILD_BYPRODUCTS "${BINARY_DIR}/thirdparty/rocksdb-install/${BYPRODUCT}"
             EXCLUDE_FROM_ALL TRUE
+            LIST_SEPARATOR % # This is needed for passing semicolon-separated lists
     )
 
     # Set variables
@@ -66,11 +85,17 @@ function(use_bundled_rocksdb SOURCE_DIR BINARY_DIR)
     # Create imported targets
     add_library(RocksDB::RocksDB STATIC IMPORTED)
     set_target_properties(RocksDB::RocksDB PROPERTIES IMPORTED_LOCATION "${ROCKSDB_LIBRARY}")
+    if (NOT WIN32)
+        add_dependencies(rocksdb-external ZLIB::ZLIB BZip2::BZip2 zstd::zstd lz4::lz4)
+    endif()
     add_dependencies(RocksDB::RocksDB rocksdb-external)
     file(MAKE_DIRECTORY ${ROCKSDB_INCLUDE_DIR})
-    set_property(TARGET RocksDB::RocksDB APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${ROCKSDB_INCLUDE_DIR})
+    target_include_directories(RocksDB::RocksDB INTERFACE ${ROCKSDB_INCLUDE_DIR})
     set_property(TARGET RocksDB::RocksDB APPEND PROPERTY INTERFACE_LINK_LIBRARIES Threads::Threads)
+    target_link_libraries(RocksDB::RocksDB INTERFACE Threads::Threads)
     if(WIN32)
-        set_property(TARGET RocksDB::RocksDB APPEND PROPERTY INTERFACE_LINK_LIBRARIES Rpcrt4.lib)
+        target_link_libraries(RocksDB::RocksDB INTERFACE Rpcrt4.lib Cabinet.lib)
+    else()
+        target_link_libraries(RocksDB::RocksDB INTERFACE ZLIB::ZLIB BZip2::BZip2 zstd::zstd lz4::lz4)
     endif()
 endfunction(use_bundled_rocksdb)
diff --git a/cmake/BundledZLIB.cmake b/cmake/BundledZLIB.cmake
index 8d2d3542f..16b77c907 100644
--- a/cmake/BundledZLIB.cmake
+++ b/cmake/BundledZLIB.cmake
@@ -69,6 +69,5 @@ function(use_bundled_zlib SOURCE_DIR BINARY_DIR)
     add_library(ZLIB::ZLIB STATIC IMPORTED)
     set_target_properties(ZLIB::ZLIB PROPERTIES IMPORTED_LOCATION "${ZLIB_LIBRARIES}")
     add_dependencies(ZLIB::ZLIB zlib-external)
-    file(MAKE_DIRECTORY ${ZLIB_INCLUDE_DIRS})
     set_property(TARGET ZLIB::ZLIB APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${ZLIB_INCLUDE_DIRS})
 endfunction(use_bundled_zlib)
diff --git a/cmake/LZ4.cmake b/cmake/LZ4.cmake
new file mode 100644
index 000000000..40260740b
--- /dev/null
+++ b/cmake/LZ4.cmake
@@ -0,0 +1,53 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+include(FetchContent)
+
+set(LZ4_BUILD_CLI OFF CACHE BOOL "" FORCE)
+set(LZ4_BUILD_LEGACY_LZ4C OFF CACHE BOOL "" FORCE)
+set(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE)
+set(BUILD_STATIC_LIBS ON CACHE BOOL "" FORCE)
+
+FetchContent_Declare(lz4
+    URL            https://github.com/lz4/lz4/archive/refs/tags/v1.9.4.tar.gz
+    URL_HASH       SHA256=0b0e3aa07c8c063ddf40b082bdf7e37a1562bda40a0ff5272957f3e987e0e54b
+)
+
+# With CMake >= 3.18, this block could be replaced with FetchContent_MakeAvailable(lz4),
+# if we add the `SOURCE_SUBDIR build/cmake` option to FetchContent_Declare() [this option is not available in CMake < 3.18].
+# As of July 2022, one of our supported platforms, Centos 7, comes with CMake 3.17.
+FetchContent_GetProperties(lz4)
+if(NOT lz4_POPULATED)
+    FetchContent_Populate(lz4)
+    # the top level doesn't contain CMakeLists.txt, it is in the "build/cmake" subdirectory
+    add_subdirectory(${lz4_SOURCE_DIR}/build/cmake ${lz4_BINARY_DIR})
+endif()
+
+add_library(lz4::lz4 ALIAS lz4_static)
+
+# Set variables
+set(LZ4_FOUND "YES" CACHE STRING "" FORCE)
+set(LZ4_INCLUDE_DIRS "${lz4_SOURCE_DIR}/lib" CACHE STRING "" FORCE)
+if (WIN32)
+    set(LZ4_LIBRARIES "${lz4_BINARY_DIR}/lib/${CMAKE_BUILD_TYPE}/lz4_static.lib" CACHE STRING "" FORCE)
+else()
+    set(LZ4_LIBRARIES "${lz4_BINARY_DIR}/liblz4.a" CACHE STRING "" FORCE)
+endif()
+
+# Set exported variables for FindPackage.cmake
+set(PASSTHROUGH_VARIABLES ${PASSTHROUGH_VARIABLES} "-DEXPORTED_LZ4_INCLUDE_DIRS=${LZ4_INCLUDE_DIRS}" CACHE STRING "" FORCE)
+set(PASSTHROUGH_VARIABLES ${PASSTHROUGH_VARIABLES} "-DEXPORTED_LZ4_LIBRARIES=${LZ4_LIBRARIES}" CACHE STRING "" FORCE)
diff --git a/cmake/Zstd.cmake b/cmake/Zstd.cmake
new file mode 100644
index 000000000..7d8a2313a
--- /dev/null
+++ b/cmake/Zstd.cmake
@@ -0,0 +1,56 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+include(FetchContent)
+
+set(ZSTD_BUILD_SHARED OFF CACHE BOOL "" FORCE)
+
+if (WIN32)
+    set(PATCH_FILE "${CMAKE_SOURCE_DIR}/thirdparty/zstd/exclude_gcc_clang_compiler_options_from_windows.patch")
+    set(PC "${Patch_EXECUTABLE}" -p1 -i "${PATCH_FILE}")
+endif()
+
+FetchContent_Declare(zstd
+    URL            https://github.com/facebook/zstd/archive/refs/tags/v1.5.2.tar.gz
+    URL_HASH       SHA256=f7de13462f7a82c29ab865820149e778cbfe01087b3a55b5332707abf9db4a6e
+    PATCH_COMMAND "${PC}"
+)
+
+# With CMake >= 3.18, this block could be replaced with FetchContent_MakeAvailable(zstd),
+# if we add the `SOURCE_SUBDIR build/cmake` option to FetchContent_Declare() [this option is not available in CMake < 3.18].
+# As of July 2022, one of our supported platforms, Centos 7, comes with CMake 3.17.
+FetchContent_GetProperties(zstd)
+if(NOT zstd_POPULATED)
+    FetchContent_Populate(zstd)
+    # the top level doesn't contain CMakeLists.txt, it is in the "build/cmake" subdirectory
+    add_subdirectory(${zstd_SOURCE_DIR}/build/cmake ${zstd_BINARY_DIR})
+endif()
+
+add_library(zstd::zstd ALIAS libzstd_static)
+
+# Set variables
+set(ZSTD_FOUND "YES" CACHE STRING "" FORCE)
+set(ZSTD_INCLUDE_DIRS "${zstd_SOURCE_DIR}/lib" CACHE STRING "" FORCE)
+if (WIN32)
+    set(ZSTD_LIBRARIES "${zstd_BINARY_DIR}/lib/${CMAKE_BUILD_TYPE}/zstd_static.lib" CACHE STRING "" FORCE)
+else()
+    set(ZSTD_LIBRARIES "${zstd_BINARY_DIR}/lib/libzstd.a" CACHE STRING "" FORCE)
+endif()
+
+# Set exported variables for FindPackage.cmake
+set(PASSTHROUGH_VARIABLES ${PASSTHROUGH_VARIABLES} "-DEXPORTED_ZSTD_INCLUDE_DIRS=${ZSTD_INCLUDE_DIRS}" CACHE STRING "" FORCE)
+set(PASSTHROUGH_VARIABLES ${PASSTHROUGH_VARIABLES} "-DEXPORTED_ZSTD_LIBRARIES=${ZSTD_LIBRARIES}" CACHE STRING "" FORCE)
diff --git a/cmake/lz4/dummy/Findlz4.cmake b/cmake/lz4/dummy/Findlz4.cmake
new file mode 100644
index 000000000..b5ab2e66e
--- /dev/null
+++ b/cmake/lz4/dummy/Findlz4.cmake
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Dummy lz4 find for when we use bundled version
+if(NOT LZ4_FOUND)
+    set(LZ4_FOUND "YES" CACHE STRING "" FORCE)
+    set(LZ4_INCLUDE_DIR "${EXPORTED_LZ4_INCLUDE_DIRS}" CACHE STRING "" FORCE)
+    set(LZ4_INCLUDE_DIRS "${EXPORTED_LZ4_INCLUDE_DIRS}" CACHE STRING "" FORCE)
+    set(LZ4_LIBRARIES "${EXPORTED_LZ4_LIBRARIES}" CACHE STRING "" FORCE)
+endif()
+
+if(NOT TARGET lz4::lz4)
+    add_library(lz4::lz4 STATIC IMPORTED)
+    set_target_properties(lz4::lz4 PROPERTIES
+            INTERFACE_INCLUDE_DIRECTORIES "${LZ4_INCLUDE_DIRS}")
+    set_target_properties(lz4::lz4 PROPERTIES
+            IMPORTED_LINK_INTERFACE_LANGUAGES "C"
+            IMPORTED_LOCATION "${LZ4_LIBRARIES}")
+endif()
diff --git a/cmake/zstd/dummy/Findzstd.cmake b/cmake/zstd/dummy/Findzstd.cmake
new file mode 100644
index 000000000..4d857e32c
--- /dev/null
+++ b/cmake/zstd/dummy/Findzstd.cmake
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Dummy zstd find for when we use bundled version
+if(NOT zstd_FOUND)
+    set(zstd_FOUND "YES" CACHE STRING "" FORCE)
+    set(ZSTD_INCLUDE_DIR "${EXPORTED_ZSTD_INCLUDE_DIRS}" CACHE STRING "" FORCE)
+    set(ZSTD_INCLUDE_DIRS "${EXPORTED_ZSTD_INCLUDE_DIRS}" CACHE STRING "" FORCE)
+    set(ZSTD_LIBRARIES "${EXPORTED_ZSTD_LIBRARIES}" CACHE STRING "" FORCE)
+endif()
+
+if(NOT TARGET zstd::zstd)
+    add_library(zstd::zstd STATIC IMPORTED)
+    set_target_properties(zstd::zstd PROPERTIES
+            INTERFACE_INCLUDE_DIRECTORIES "${ZSTD_INCLUDE_DIRS}")
+    set_target_properties(zstd::zstd PROPERTIES
+            IMPORTED_LINK_INTERFACE_LANGUAGES "C"
+            IMPORTED_LOCATION "${ZSTD_LIBRARIES}")
+endif()
diff --git a/conf/minifi.properties b/conf/minifi.properties
index 491a94016..856de8fb1 100644
--- a/conf/minifi.properties
+++ b/conf/minifi.properties
@@ -28,9 +28,11 @@ nifi.provenance.repository.directory.default=${MINIFI_HOME}/provenance_repositor
 nifi.provenance.repository.max.storage.time=1 MIN
 nifi.provenance.repository.max.storage.size=1 MB
 nifi.flowfile.repository.directory.default=${MINIFI_HOME}/flowfile_repository
+# nifi.flowfile.repository.rocksdb.compression=auto
 nifi.database.content.repository.directory.default=${MINIFI_HOME}/content_repository
 nifi.provenance.repository.class.name=NoOpRepository
 nifi.content.repository.class.name=DatabaseContentRepository
+# nifi.content.repository.rocksdb.compression=auto
 
 #nifi.remote.input.secure=true
 #nifi.security.need.ClientAuth=
diff --git a/extensions/libarchive/CMakeLists.txt b/extensions/libarchive/CMakeLists.txt
index 674306dc7..2b89d3ea7 100644
--- a/extensions/libarchive/CMakeLists.txt
+++ b/extensions/libarchive/CMakeLists.txt
@@ -27,12 +27,6 @@ if (NOT DISABLE_LZMA)
     list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/liblzma/dummy")
 endif()
 
-if (NOT DISABLE_BZIP2)
-    include(BundledBZip2)
-    use_bundled_bzip2(${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR})
-    list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/bzip2/dummy")
-endif()
-
 include(BundledLibArchive)
 use_bundled_libarchive(${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR})
 
diff --git a/extensions/rocksdb-repos/DatabaseContentRepository.cpp b/extensions/rocksdb-repos/DatabaseContentRepository.cpp
index 881de1085..9beeb2498 100644
--- a/extensions/rocksdb-repos/DatabaseContentRepository.cpp
+++ b/extensions/rocksdb-repos/DatabaseContentRepository.cpp
@@ -53,10 +53,13 @@ bool DatabaseContentRepository::initialize(const std::shared_ptr<minifi::Configu
       db_opts.set(&rocksdb::DBOptions::env, rocksdb::Env::Default());
     }
   };
-  auto set_cf_opts = [] (rocksdb::ColumnFamilyOptions& cf_opts){
+  auto set_cf_opts = [&configuration] (rocksdb::ColumnFamilyOptions& cf_opts) {
     cf_opts.OptimizeForPointLookup(4);
     cf_opts.merge_operator = std::make_shared<StringAppender>();
     cf_opts.max_successive_merges = 0;
+    if (auto compression_type = minifi::internal::readConfiguredCompressionType(configuration, Configure::nifi_content_repository_rocksdb_compression)) {
+      cf_opts.compression = *compression_type;
+    }
   };
   db_ = minifi::internal::RocksDatabase::create(set_db_opts, set_cf_opts, directory_);
   if (db_->open()) {
diff --git a/extensions/rocksdb-repos/FlowFileRepository.cpp b/extensions/rocksdb-repos/FlowFileRepository.cpp
index ecb9a1bcc..8637b4282 100644
--- a/extensions/rocksdb-repos/FlowFileRepository.cpp
+++ b/extensions/rocksdb-repos/FlowFileRepository.cpp
@@ -220,11 +220,14 @@ bool FlowFileRepository::initialize(const std::shared_ptr<Configure> &configure)
   // To avoid DB write issues during heavy load it's recommended to have high number of buffer.
   // Rocksdb's stall feature can also trigger in case the number of buffers is >= 3.
   // The more buffers we have the more memory rocksdb can utilize without significant memory consumption under low load.
-  auto cf_options = [] (rocksdb::ColumnFamilyOptions& cf_opts) {
+  auto cf_options = [&configure] (rocksdb::ColumnFamilyOptions& cf_opts) {
     cf_opts.OptimizeForPointLookup(4);
     cf_opts.write_buffer_size = 8ULL << 20U;
     cf_opts.max_write_buffer_number = 20;
     cf_opts.min_write_buffer_number_to_merge = 1;
+    if (auto compression_type = minifi::internal::readConfiguredCompressionType(configure, Configure::nifi_flow_repository_rocksdb_compression)) {
+      cf_opts.compression = *compression_type;
+    }
   };
   db_ = minifi::internal::RocksDatabase::create(db_options, cf_options, directory_);
   if (db_->open()) {
diff --git a/extensions/rocksdb-repos/database/RocksDbUtils.cpp b/extensions/rocksdb-repos/database/RocksDbUtils.cpp
new file mode 100644
index 000000000..d3834dfef
--- /dev/null
+++ b/extensions/rocksdb-repos/database/RocksDbUtils.cpp
@@ -0,0 +1,54 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "RocksDbUtils.h"
+
+#include <string>
+
+#include "Exception.h"
+
+namespace org::apache::nifi::minifi::internal {
+
+std::optional<rocksdb::CompressionType> readConfiguredCompressionType(const std::shared_ptr<Configure> &configuration, const std::string& config_key) {
+  std::string value;
+  if (!configuration->get(config_key, value) || value.empty()) {
+    return std::nullopt;
+  }
+#ifdef WIN32
+  if (value == "auto" || value == "xpress") {
+    return rocksdb::CompressionType::kXpressCompression;
+  } else {
+    throw Exception(REPOSITORY_EXCEPTION, "RocksDB compression type not supported: " + value);
+  }
+#else
+  if (value == "zlib") {
+    return rocksdb::CompressionType::kZlibCompression;
+  } else if (value == "bzip2") {
+    return rocksdb::CompressionType::kBZip2Compression;
+  } else if (value == "auto" || value == "zstd") {
+    return rocksdb::CompressionType::kZSTD;
+  } else if (value == "lz4") {
+    return rocksdb::CompressionType::kLZ4Compression;
+  } else if (value == "lz4hc") {
+    return rocksdb::CompressionType::kLZ4HCCompression;
+  } else {
+    throw Exception(REPOSITORY_EXCEPTION, "RocksDB compression type not supported: " + value);
+  }
+#endif
+}
+
+}  // namespace org::apache::nifi::minifi::internal
diff --git a/extensions/rocksdb-repos/database/RocksDbUtils.h b/extensions/rocksdb-repos/database/RocksDbUtils.h
index d42afce6a..b5748c117 100644
--- a/extensions/rocksdb-repos/database/RocksDbUtils.h
+++ b/extensions/rocksdb-repos/database/RocksDbUtils.h
@@ -15,19 +15,20 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #pragma once
 
 #include <functional>
 #include <algorithm>
+#include <optional>
+#include <memory>
+#include <string>
+
 #include "rocksdb/db.h"
+#include "rocksdb/options.h"
 #include "utils/GeneralUtils.h"
+#include "properties/Configure.h"
 
-namespace org {
-namespace apache {
-namespace nifi {
-namespace minifi {
-namespace internal {
+namespace org::apache::nifi::minifi::internal {
 
 enum class RocksDbMode {
   ReadOnly,
@@ -69,8 +70,6 @@ class Writable {
 using DBOptionsPatch = std::function<void(Writable<rocksdb::DBOptions>&)>;
 using ColumnFamilyOptionsPatch = std::function<void(rocksdb::ColumnFamilyOptions&)>;
 
-}  // namespace internal
-}  // namespace minifi
-}  // namespace nifi
-}  // namespace apache
-}  // namespace org
+std::optional<rocksdb::CompressionType> readConfiguredCompressionType(const std::shared_ptr<Configure> &configuration, const std::string& config_key);
+
+}  // namespace org::apache::nifi::minifi::internal
diff --git a/extensions/rocksdb-repos/database/StringAppender.cpp b/extensions/rocksdb-repos/database/StringAppender.cpp
index 4580dcdbc..7f46e5943 100644
--- a/extensions/rocksdb-repos/database/StringAppender.cpp
+++ b/extensions/rocksdb-repos/database/StringAppender.cpp
@@ -39,7 +39,7 @@ bool StringAppender::Merge(const rocksdb::Slice& /*key*/, const rocksdb::Slice*
   return true;
 }
 
-static auto string_appender_registrar = rocksdb::ObjectLibrary::Default()->Register<StringAppender>(
+static auto string_appender_registrar = rocksdb::ObjectLibrary::Default()->AddFactory<StringAppender>(
     "StringAppender",
     [] (const std::string& /* uri */, std::unique_ptr<StringAppender>* out, std::string* /* errmsg */) {
       *out = std::make_unique<StringAppender>();
diff --git a/extensions/standard-processors/tests/unit/PutTCPTests.cpp b/extensions/standard-processors/tests/unit/PutTCPTests.cpp
index ac44c4db8..524c451b2 100644
--- a/extensions/standard-processors/tests/unit/PutTCPTests.cpp
+++ b/extensions/standard-processors/tests/unit/PutTCPTests.cpp
@@ -202,8 +202,8 @@ class PutTCPTestFixture {
     put_tcp_->setProperty(PutTCP::Port, utils::StringUtils::join_pack("${literal('", std::to_string(port), "')}"));
   }
 
-  void setPutTCPPort(std::string port_str) {
-    put_tcp_->setProperty(PutTCP::Port, std::move(port_str));
+  void setPutTCPPort(const std::string& port_str) {
+    put_tcp_->setProperty(PutTCP::Port, port_str);
   }
 
   [[nodiscard]] uint16_t getSinglePort() const {
diff --git a/libminifi/include/properties/Configuration.h b/libminifi/include/properties/Configuration.h
index 25e6b5ebc..978600bb3 100644
--- a/libminifi/include/properties/Configuration.h
+++ b/libminifi/include/properties/Configuration.h
@@ -51,7 +51,9 @@ class Configuration : public Properties {
   static constexpr const char *nifi_server_name = "nifi.server.name";
   static constexpr const char *nifi_configuration_class_name = "nifi.flow.configuration.class.name";
   static constexpr const char *nifi_flow_repository_class_name = "nifi.flowfile.repository.class.name";
+  static constexpr const char *nifi_flow_repository_rocksdb_compression = "nifi.flowfile.repository.rocksdb.compression";
   static constexpr const char *nifi_content_repository_class_name = "nifi.content.repository.class.name";
+  static constexpr const char *nifi_content_repository_rocksdb_compression = "nifi.content.repository.rocksdb.compression";
   static constexpr const char *nifi_provenance_repository_class_name = "nifi.provenance.repository.class.name";
   static constexpr const char *nifi_volatile_repository_options_flowfile_max_count = "nifi.volatile.repository.options.flowfile.max.count";
   static constexpr const char *nifi_volatile_repository_options_flowfile_max_bytes = "nifi.volatile.repository.options.flowfile.max.bytes";
diff --git a/libminifi/src/Configuration.cpp b/libminifi/src/Configuration.cpp
index 78ae790f7..c584cb574 100644
--- a/libminifi/src/Configuration.cpp
+++ b/libminifi/src/Configuration.cpp
@@ -36,7 +36,9 @@ const std::vector<core::ConfigurationProperty> Configuration::CONFIGURATION_PROP
   core::ConfigurationProperty{Configuration::nifi_server_name},
   core::ConfigurationProperty{Configuration::nifi_configuration_class_name},
   core::ConfigurationProperty{Configuration::nifi_flow_repository_class_name},
+  core::ConfigurationProperty{Configuration::nifi_flow_repository_rocksdb_compression},
   core::ConfigurationProperty{Configuration::nifi_content_repository_class_name},
+  core::ConfigurationProperty{Configuration::nifi_content_repository_rocksdb_compression},
   core::ConfigurationProperty{Configuration::nifi_provenance_repository_class_name},
   core::ConfigurationProperty{Configuration::nifi_volatile_repository_options_flowfile_max_count, gsl::make_not_null(core::StandardValidators::get().UNSIGNED_INT_VALIDATOR.get())},
   core::ConfigurationProperty{Configuration::nifi_volatile_repository_options_flowfile_max_bytes, gsl::make_not_null(core::StandardValidators::get().DATA_SIZE_VALIDATOR.get())},
diff --git a/thirdparty/zstd/exclude_gcc_clang_compiler_options_from_windows.patch b/thirdparty/zstd/exclude_gcc_clang_compiler_options_from_windows.patch
new file mode 100644
index 000000000..c32f78f4f
--- /dev/null
+++ b/thirdparty/zstd/exclude_gcc_clang_compiler_options_from_windows.patch
@@ -0,0 +1,37 @@
+diff --git a/build/cmake/tests/CMakeLists.txt b/build/cmake/tests/CMakeLists.txt
+index 8bba6ea6..ce84ed8c 100644
+--- a/build/cmake/tests/CMakeLists.txt
++++ b/build/cmake/tests/CMakeLists.txt
+@@ -57,7 +57,9 @@ target_link_libraries(datagen libzstd_static)
+ # fullbench
+ #
+ add_executable(fullbench ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${TESTS_DIR}/fullbench.c)
+-set_property(TARGET fullbench APPEND PROPERTY COMPILE_OPTIONS "-Wno-deprecated-declarations")
++if (NOT WIN32)
++    set_property(TARGET fullbench APPEND PROPERTY COMPILE_OPTIONS "-Wno-deprecated-declarations")
++endif()
+ target_link_libraries(fullbench libzstd_static)
+ add_test(NAME fullbench COMMAND fullbench ${ZSTD_FULLBENCH_FLAGS})
+ 
+@@ -65,7 +67,9 @@ add_test(NAME fullbench COMMAND fullbench ${ZSTD_FULLBENCH_FLAGS})
+ # fuzzer
+ #
+ add_executable(fuzzer ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${TESTS_DIR}/fuzzer.c)
+-set_property(TARGET fuzzer APPEND PROPERTY COMPILE_OPTIONS "-Wno-deprecated-declarations")
++if (NOT WIN32)
++    set_property(TARGET fuzzer APPEND PROPERTY COMPILE_OPTIONS "-Wno-deprecated-declarations")
++endif()
+ target_link_libraries(fuzzer libzstd_static)
+ AddTestFlagsOption(ZSTD_FUZZER_FLAGS "$ENV{FUZZERTEST} $ENV{FUZZER_FLAGS}"
+     "Semicolon-separated list of flags to pass to the fuzzer test (see `fuzzer -h` for usage)")
+@@ -78,7 +82,9 @@ add_test(NAME fuzzer COMMAND fuzzer ${ZSTD_FUZZER_FLAGS})
+ # zstreamtest
+ #
+ add_executable(zstreamtest ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${TESTS_DIR}/seqgen.c ${TESTS_DIR}/zstreamtest.c)
+-set_property(TARGET zstreamtest APPEND PROPERTY COMPILE_OPTIONS "-Wno-deprecated-declarations")
++if (NOT WIN32)
++    set_property(TARGET zstreamtest APPEND PROPERTY COMPILE_OPTIONS "-Wno-deprecated-declarations")
++endif()
+ target_link_libraries(zstreamtest libzstd_static)
+ AddTestFlagsOption(ZSTD_ZSTREAM_FLAGS "$ENV{ZSTREAM_TESTTIME} $ENV{FUZZER_FLAGS}"
+     "Semicolon-separated list of flags to pass to the zstreamtest test (see `zstreamtest -h` for usage)")