You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by hu...@apache.org on 2019/03/24 09:16:32 UTC

[hawq] branch huor_liborc updated: HAWQ-1692. Add the ORC format implementation

This is an automated email from the ASF dual-hosted git repository.

huor pushed a commit to branch huor_liborc
in repository https://gitbox.apache.org/repos/asf/hawq.git


The following commit(s) were added to refs/heads/huor_liborc by this push:
     new ff3c381  HAWQ-1692. Add the ORC format implementation
ff3c381 is described below

commit ff3c381a7bacf4d033fc8322c077482f778aa73b
Author: Ruilong Huo <hu...@apache.org>
AuthorDate: Sun Mar 24 17:15:47 2019 +0800

    HAWQ-1692. Add the ORC format implementation
---
 depends/storage/.gitignore                         |    8 +
 .../storage/CMake/CMakeTestCompileInt64tType.cc    |   30 +
 depends/storage/CMake/FindCogapp.cmake             |   50 +
 depends/storage/CMake/FindGFlags.cmake             |   48 +
 depends/storage/CMake/FindGlog.cmake               |   49 +
 depends/storage/CMake/FindJSON.cmake               |   38 +
 depends/storage/CMake/FindSnappy.cmake             |   30 +
 depends/storage/CMake/FindZLIB.cmake               |   48 +
 depends/storage/CMake/Functions.cmake              |   46 +
 depends/storage/CMake/Options.cmake                |   71 +
 depends/storage/CMake/Platform.cmake               |   47 +
 depends/storage/CMakeLists.txt                     |   28 +
 depends/storage/README                             |   15 +
 depends/storage/bootstrap                          |  109 +
 depends/storage/src/CMakeLists.txt                 |   84 +
 depends/storage/src/storage/README                 |    0
 depends/storage/src/storage/common/bloom-filter.h  |  189 ++
 depends/storage/src/storage/common/string.h        |   95 +
 .../src/storage/cwrapper/hdfs-file-system-c.cc     |  486 ++++
 .../src/storage/cwrapper/hdfs-file-system-c.h      |  126 +
 .../storage/src/storage/cwrapper/orc-format-c.cc   |  638 +++++
 .../storage/src/storage/cwrapper/orc-format-c.h    |   81 +
 depends/storage/src/storage/format/format.cc       |   94 +
 depends/storage/src/storage/format/format.h        |  223 ++
 depends/storage/src/storage/format/orc/README      |  320 +++
 depends/storage/src/storage/format/orc/byte-rle.cc |  476 ++++
 depends/storage/src/storage/format/orc/byte-rle.h  |  237 ++
 .../src/storage/format/orc/column-printer.cc       |  613 +++++
 .../src/storage/format/orc/column-printer.h        |  280 ++
 .../storage/src/storage/format/orc/data-buffer.cc  |   67 +
 .../storage/src/storage/format/orc/data-buffer.h   |   62 +
 .../storage/src/storage/format/orc/exceptions.cc   |   58 +
 .../storage/src/storage/format/orc/exceptions.h    |   51 +
 .../storage/src/storage/format/orc/file-version.h  |   56 +
 .../storage/src/storage/format/orc/input-stream.cc |   40 +
 .../storage/src/storage/format/orc/input-stream.h  |  112 +
 depends/storage/src/storage/format/orc/int128.cc   |  480 ++++
 depends/storage/src/storage/format/orc/int128.h    |  304 +++
 .../src/storage/format/orc/lzo-decompressor.cc     |  396 +++
 .../src/storage/format/orc/lzo-decompressor.h      |   35 +
 .../src/storage/format/orc/orc-format-reader.cc    |  278 ++
 .../src/storage/format/orc/orc-format-reader.h     |   77 +
 .../src/storage/format/orc/orc-format-writer.cc    |  208 ++
 .../src/storage/format/orc/orc-format-writer.h     |   64 +
 .../storage/src/storage/format/orc/orc-format.cc   |  129 +
 .../storage/src/storage/format/orc/orc-format.h    |  113 +
 .../src/storage/format/orc/orc-predicates.cc       |  281 ++
 .../src/storage/format/orc/orc-predicates.h        |   71 +
 .../src/storage/format/orc/orc-proto-definition.cc |  221 ++
 .../src/storage/format/orc/orc-proto-definition.h  | 1131 ++++++++
 .../storage/src/storage/format/orc/orc_proto.proto |  277 ++
 .../src/storage/format/orc/output-stream.cc        |   33 +
 .../storage/src/storage/format/orc/output-stream.h |  135 +
 depends/storage/src/storage/format/orc/reader.cc   | 2424 +++++++++++++++++
 depends/storage/src/storage/format/orc/reader.h    | 1071 ++++++++
 depends/storage/src/storage/format/orc/rle-v0.h    |  137 +
 depends/storage/src/storage/format/orc/rle-v1.h    |  371 +++
 depends/storage/src/storage/format/orc/rle-v2.h    | 1768 ++++++++++++
 depends/storage/src/storage/format/orc/rle.cc      |  139 +
 depends/storage/src/storage/format/orc/rle.h       |  596 ++++
 .../storage/format/orc/seekable-input-stream.cc    |  624 +++++
 .../src/storage/format/orc/seekable-input-stream.h |  378 +++
 .../storage/format/orc/seekable-output-stream.cc   |   46 +
 .../storage/format/orc/seekable-output-stream.h    |  261 ++
 .../src/storage/format/orc/string-dictionary.cc    |   60 +
 .../src/storage/format/orc/string-dictionary.h     |   62 +
 depends/storage/src/storage/format/orc/timezone.cc |  458 ++++
 depends/storage/src/storage/format/orc/timezone.h  |  502 ++++
 .../storage/src/storage/format/orc/type-impl.cc    |  507 ++++
 depends/storage/src/storage/format/orc/type-impl.h |  109 +
 depends/storage/src/storage/format/orc/type.h      |  105 +
 depends/storage/src/storage/format/orc/vector.cc   |  453 ++++
 depends/storage/src/storage/format/orc/vector.h    |  704 +++++
 depends/storage/src/storage/format/orc/writer.cc   |  288 ++
 depends/storage/src/storage/format/orc/writer.h    | 1516 +++++++++++
 .../format/orc/writer/binary-column-writer.cc      |  101 +
 .../format/orc/writer/decimal-column-writer.cc     |  295 ++
 .../format/orc/writer/string-column-writer.cc      |  232 ++
 depends/storage/src/storage/testutil/file-utils.h  |   54 +
 depends/storage/src/storage/testutil/format-util.h |  288 ++
 depends/storage/test/CMakeLists.txt                |   32 +
 depends/storage/test/data/hawq-write-orc.sql       |    3 +
 depends/storage/test/data/sampledata               |    0
 depends/storage/test/data/spark-read-orc.sql       |    3 +
 depends/storage/test/parallel/parallel-launcher.py |  153 ++
 depends/storage/test/unit/CMakeLists.txt           |   23 +
 .../storage/test/unit/common/test-bloom-filter.cc  |  118 +
 .../test/unit/format/test-filter-pushdown.cc       |  613 +++++
 .../test/unit/format/test-orc-byte-rle-encoder.cc  |  165 ++
 .../storage/test/unit/format/test-orc-byte-rle.cc  | 1445 ++++++++++
 .../storage/test/unit/format/test-orc-format.cc    |  529 ++++
 .../storage/test/unit/format/test-orc-int128.cc    |  620 +++++
 .../test/unit/format/test-orc-proto-definition.cc  |  351 +++
 depends/storage/test/unit/format/test-orc-rle.cc   | 2863 ++++++++++++++++++++
 .../storage/test/unit/format/test-orc-vector.cc    |  194 ++
 .../test/unit/format/test-string-dictionary.cc     |   74 +
 depends/storage/test/unit/unit-test-main.cc        |   32 +
 97 files changed, 30005 insertions(+)

diff --git a/depends/storage/.gitignore b/depends/storage/.gitignore
new file mode 100644
index 0000000..a9b913a
--- /dev/null
+++ b/depends/storage/.gitignore
@@ -0,0 +1,8 @@
+.DS_Store
+.cproject
+.project
+.settings
+.pydevproject
+*.pyc
+build/
+CodeCoverageReport/
diff --git a/depends/storage/CMake/CMakeTestCompileInt64tType.cc b/depends/storage/CMake/CMakeTestCompileInt64tType.cc
new file mode 100644
index 0000000..ad2fc8e
--- /dev/null
+++ b/depends/storage/CMake/CMakeTestCompileInt64tType.cc
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <cstdint>
+
+// test if int64_t is typedef to be long long
+
+void test(long long *i) {}  // NOLINT
+
+int main() {
+  int64_t i = 0;
+  test(&i);
+  return 0;
+}
diff --git a/depends/storage/CMake/FindCogapp.cmake b/depends/storage/CMake/FindCogapp.cmake
new file mode 100644
index 0000000..198c23b
--- /dev/null
+++ b/depends/storage/CMake/FindCogapp.cmake
@@ -0,0 +1,50 @@
+# locate cogapp and generate source code from template
+# 
+# find_package(Cogapp REQUIRED)
+#
+# COGAPP_GENERATE (public function)
+#   GENERATED_CODE = Variable to define with generated source files.
+#   TEMPLATE  = Template used to generate source files.
+#
+# NOTE: The COGAPP_GENERATE macro & add_executable() or add_library()
+#       calls only work properly within the same directory.
+#
+
+find_package(PythonInterp REQUIRED)
+
+function(COGAPP_GENERATE GENERATED_CODE)
+  if(NOT ARGN)
+    message(SEND_ERROR "Error: COGAPP_GENERATE() called without any template files")
+    return()
+  endif()
+
+  set(${GENERATED_CODE})
+  foreach(FIL ${ARGN})
+    get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
+    file(RELATIVE_PATH FIL_REL ${CMAKE_SOURCE_DIR} ${ABS_FIL})
+    
+    get_filename_component(FIL_DIR ${CMAKE_BINARY_DIR}/codegen/${FIL_REL} DIRECTORY)
+    file(MAKE_DIRECTORY ${FIL_DIR})
+    
+    get_filename_component(FIL_WE "${CMAKE_BINARY_DIR}/codegen/${FIL_REL}" NAME_WE)
+    get_filename_component(FIL_EXT "${CMAKE_BINARY_DIR}/codegen/${FIL_REL}" EXT)
+    
+    set(FIL_OUT "${FIL_DIR}/${FIL_WE}.cg${FIL_EXT}")
+    list(APPEND ${GENERATED_CODE} ${FIL_OUT})
+    
+    if(NOT EXISTS ${ABS_FIL})
+        MESSAGE(FATAL_ERROR "file ${ABS_FIL} does not exist")
+    endif()
+
+    add_custom_command(
+      OUTPUT ${FIL_OUT}
+      COMMAND  ${PYTHON_EXECUTABLE}
+      ARGS -m cogapp -d -o ${FIL_OUT} ${ABS_FIL}
+      DEPENDS ${ABS_FIL}
+      COMMENT "Running cog on ${FIL}"
+      VERBATIM )
+  endforeach()
+
+  set_source_files_properties(${${GENERATED_CODE}} PROPERTIES GENERATED TRUE)
+  set(${GENERATED_CODE} ${${GENERATED_CODE}} PARENT_SCOPE)
+endfunction()
\ No newline at end of file
diff --git a/depends/storage/CMake/FindGFlags.cmake b/depends/storage/CMake/FindGFlags.cmake
new file mode 100644
index 0000000..f93c571
--- /dev/null
+++ b/depends/storage/CMake/FindGFlags.cmake
@@ -0,0 +1,48 @@
+# - Try to find GFLAGS
+#
+# The following variables are optionally searched for defaults
+#  GFLAGS_ROOT_DIR:            Base directory where all GFLAGS components are found
+#
+# The following are set after configuration is done:
+#  GFLAGS_FOUND
+#  GFLAGS_INCLUDE_DIRS
+#  GFLAGS_LIBRARIES
+#  GFLAGS_LIBRARYRARY_DIRS
+
+include(FindPackageHandleStandardArgs)
+
+set(GFLAGS_ROOT_DIR "" CACHE PATH "Folder contains Gflags")
+
+# We are testing only a couple of files in the include directories
+if(WIN32)
+    find_path(GFLAGS_INCLUDE_DIR gflags/gflags.h
+        PATHS ${GFLAGS_ROOT_DIR}/src/windows)
+else()
+    find_path(GFLAGS_INCLUDE_DIR gflags/gflags.h
+        PATHS ${GFLAGS_ROOT_DIR})
+endif()
+
+if(MSVC)
+    find_library(GFLAGS_LIBRARY_RELEASE
+        NAMES libgflags
+        PATHS ${GFLAGS_ROOT_DIR}
+        PATH_SUFFIXES Release)
+
+    find_library(GFLAGS_LIBRARY_DEBUG
+        NAMES libgflags-debug
+        PATHS ${GFLAGS_ROOT_DIR}
+        PATH_SUFFIXES Debug)
+
+    set(GFLAGS_LIBRARY optimized ${GFLAGS_LIBRARY_RELEASE} debug ${GFLAGS_LIBRARY_DEBUG})
+else()
+    find_library(GFLAGS_LIBRARY gflags)
+endif()
+
+find_package_handle_standard_args(GFLAGS DEFAULT_MSG
+    GFLAGS_INCLUDE_DIR GFLAGS_LIBRARY)
+
+
+if(GFLAGS_FOUND)
+    set(GFLAGS_INCLUDE_DIRS ${GFLAGS_INCLUDE_DIR})
+    set(GFLAGS_LIBRARIES ${GFLAGS_LIBRARY})
+endif()
diff --git a/depends/storage/CMake/FindGlog.cmake b/depends/storage/CMake/FindGlog.cmake
new file mode 100644
index 0000000..d9f0ee0
--- /dev/null
+++ b/depends/storage/CMake/FindGlog.cmake
@@ -0,0 +1,49 @@
+
+# - Try to find Glog
+#
+# The following variables are optionally searched for defaults
+#  GLOG_ROOT_DIR:            Base directory where all GLOG components are found
+#
+# The following are set after configuration is done: 
+#  GLOG_FOUND
+#  GLOG_INCLUDE_DIRS
+#  GLOG_LIBRARIES
+#  GLOG_LIBRARYRARY_DIRS
+
+include(FindPackageHandleStandardArgs)
+
+set(GLOG_ROOT_DIR "" CACHE PATH "Folder contains Google glog")
+
+if(WIN32)
+    find_path(GLOG_INCLUDE_DIR glog/logging.h
+        PATHS ${GLOG_ROOT_DIR}/src/windows)
+else()
+    find_path(GLOG_INCLUDE_DIR glog/logging.h
+        PATHS ${GLOG_ROOT_DIR})
+endif()
+
+if(MSVC)
+    find_library(GLOG_LIBRARY_RELEASE libglog_static
+        PATHS ${GLOG_ROOT_DIR}
+        PATH_SUFFIXES Release)
+
+    find_library(GLOG_LIBRARY_DEBUG libglog_static
+        PATHS ${GLOG_ROOT_DIR}
+        PATH_SUFFIXES Debug)
+
+    set(GLOG_LIBRARY optimized ${GLOG_LIBRARY_RELEASE} debug ${GLOG_LIBRARY_DEBUG})
+else()
+    find_library(GLOG_LIBRARY glog
+        PATHS ${GLOG_ROOT_DIR}
+        PATH_SUFFIXES
+            lib
+            lib64)
+endif()
+
+find_package_handle_standard_args(GLOG DEFAULT_MSG
+    GLOG_INCLUDE_DIR GLOG_LIBRARY)
+
+if(GLOG_FOUND)
+    set(GLOG_INCLUDE_DIRS ${GLOG_INCLUDE_DIR})
+    set(GLOG_LIBRARIES ${GLOG_LIBRARY})
+endif()
\ No newline at end of file
diff --git a/depends/storage/CMake/FindJSON.cmake b/depends/storage/CMake/FindJSON.cmake
new file mode 100644
index 0000000..a334948
--- /dev/null
+++ b/depends/storage/CMake/FindJSON.cmake
@@ -0,0 +1,38 @@
+# - Find json
+# Find the native JSON headers and libraries.
+#
+#  JSON_INCLUDE_DIRS   - where to find json/json.h, etc.
+#  JSON_LIBRARIES      - List of libraries when using json.
+#  JSON_FOUND          - True if json found.
+
+#=============================================================================
+# Copyright 2006-2009 Kitware, Inc.
+# Copyright 2012 Rolf Eike Beer <ei...@sf-mail.de>
+#
+# Distributed under the OSI-approved BSD License (the "License");
+# see accompanying file Copyright.txt for details.
+#
+# This software is distributed WITHOUT ANY WARRANTY; without even the
+# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# See the License for more information.
+#=============================================================================
+# (To distribute this file outside of CMake, substitute the full
+#  License text for the above reference.)
+
+# Look for the header file.
+find_path(JSON_INCLUDE_DIR NAMES json/json.h)
+mark_as_advanced(JSON_INCLUDE_DIR)
+
+# Look for the library (sorted from most current/relevant entry to least).
+find_library(JSON_LIBRARY NAMES jsoncpp
+)
+mark_as_advanced(JSON_LIBRARY)
+
+# handle the QUIETLY and REQUIRED arguments and set JSON_FOUND to TRUE if
+# all listed variables are TRUE
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(JSON DEFAULT_MSG JSON_INCLUDE_DIR JSON_LIBRARY)
+
+if(JSON_FOUND)
+  set(JSON_LIBRARIES ${JSON_LIBRARY})
+  set(JSON_INCLUDE_DIRS ${JSON_INCLUDE_DIR})
+endif()
diff --git a/depends/storage/CMake/FindSnappy.cmake b/depends/storage/CMake/FindSnappy.cmake
new file mode 100644
index 0000000..623d2d7
--- /dev/null
+++ b/depends/storage/CMake/FindSnappy.cmake
@@ -0,0 +1,30 @@
+# Variables used by this module, they can change the default behaviour and need
+# to be set before calling find_package:
+#
+#  SNAPPY_ROOT_DIR  Set this variable to the root installation of
+#                    Snappy if the module has problems finding
+#                    the proper installation path.
+#
+# Variables defined by this module:
+#
+#  SNAPPY_FOUND              System has Snappy libs/headers
+#  SNAPPY_LIBRARIES          The Snappy libraries
+#  SNAPPY_INCLUDE_DIR        The location of Snappy headers
+
+find_path(SNAPPY_INCLUDE_DIR
+    NAMES snappy.h
+    HINTS ${SNAPPY_ROOT_DIR}/include)
+
+find_library(SNAPPY_LIBRARIES
+    NAMES snappy
+    HINTS ${SNAPPY_ROOT_DIR}/lib)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(Snappy DEFAULT_MSG
+    SNAPPY_LIBRARIES
+    SNAPPY_INCLUDE_DIR)
+
+mark_as_advanced(
+    SNAPPY_ROOT_DIR
+    SNAPPY_LIBRARIES
+    SNAPPY_INCLUDE_DIR)
diff --git a/depends/storage/CMake/FindZLIB.cmake b/depends/storage/CMake/FindZLIB.cmake
new file mode 100644
index 0000000..ec27957
--- /dev/null
+++ b/depends/storage/CMake/FindZLIB.cmake
@@ -0,0 +1,48 @@
+# - Find zlib
+# Find the native ZLIB headers and libraries.
+#
+#  ZLIB_INCLUDE_DIRS   - where to find zlib.h, etc.
+#  ZLIB_LIBRARIES      - List of libraries when using zlib.
+#  ZLIB_FOUND          - True if zlib found.
+
+#=============================================================================
+# (C) 1995-2017 Jean-loup Gailly and Mark Adler
+#
+#  This software is provided 'as-is', without any express or implied
+#  warranty.  In no event will the authors be held liable for any damages
+#  arising from the use of this software.
+#
+#  Permission is granted to anyone to use this software for any purpose,
+#  including commercial applications, and to alter it and redistribute it
+#  freely, subject to the following restrictions:
+#
+#  1. The origin of this software must not be misrepresented; you must not
+#     claim that you wrote the original software. If you use this software
+#     in a product, an acknowledgment in the product documentation would be
+#     appreciated but is not required.
+#  2. Altered source versions must be plainly marked as such, and must not be
+#     misrepresented as being the original software.
+#  3. This notice may not be removed or altered from any source distribution.
+#
+#  Jean-loup Gailly        Mark Adler
+#  jloup@gzip.org          madler@alumni.caltech.edu
+#=============================================================================
+# (To distribute this file outside of CMake, substitute the full
+#  License text for the above reference.)
+
+# Look for the header file.
+find_path(ZLIB_INCLUDE_DIR NAMES zlib.h)
+mark_as_advanced(ZLIB_INCLUDE_DIR)
+
+# Look for the library (sorted from most current/relevant entry to least).
+find_library(ZLIB_LIBRARY NAMES z)
+mark_as_advanced(ZLIB_LIBRARY)
+
+# handle the QUIETLY and REQUIRED arguments and set ZLIB_FOUND to TRUE if
+# all listed variables are TRUE
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(ZLIB DEFAULT_MSG ZLIB_INCLUDE_DIR ZLIB_LIBRARY)
+
+if(ZLIB_FOUND)
+  set(ZLIB_LIBRARIES ${ZLIB_LIBRARY})
+  set(ZLIB_INCLUDE_DIRS ${ZLIB_INCLUDE_DIR})
+endif()
diff --git a/depends/storage/CMake/Functions.cmake b/depends/storage/CMake/Functions.cmake
new file mode 100644
index 0000000..a771b60
--- /dev/null
+++ b/depends/storage/CMake/Functions.cmake
@@ -0,0 +1,46 @@
+FUNCTION(AUTO_SOURCES RETURN_VALUE PATTERN SOURCE_SUBDIRS)
+
+	IF ("${SOURCE_SUBDIRS}" STREQUAL "RECURSE")
+		SET(PATH ".")
+		IF (${ARGC} EQUAL 4)
+			LIST(GET ARGV 3 PATH)
+		ENDIF ()
+	ENDIF()
+
+	IF ("${SOURCE_SUBDIRS}" STREQUAL "RECURSE")
+		UNSET(${RETURN_VALUE})
+		FILE(GLOB SUBDIR_FILES "${PATH}/${PATTERN}")
+		LIST(APPEND ${RETURN_VALUE} ${SUBDIR_FILES})
+
+		FILE(GLOB SUBDIRS RELATIVE ${PATH} ${PATH}/*)
+
+		FOREACH(DIR ${SUBDIRS})
+			IF (IS_DIRECTORY ${PATH}/${DIR})
+				IF (NOT "${DIR}" STREQUAL "CMAKEFILES")
+					FILE(GLOB_RECURSE SUBDIR_FILES "${PATH}/${DIR}/${PATTERN}")
+					LIST(APPEND ${RETURN_VALUE} ${SUBDIR_FILES})
+				ENDIF()
+			ENDIF()
+		ENDFOREACH()
+	ELSE ()
+		FILE(GLOB ${RETURN_VALUE} "${PATTERN}")
+
+		FOREACH (PATH ${SOURCE_SUBDIRS})
+			FILE(GLOB SUBDIR_FILES "${PATH}/${PATTERN}")
+			LIST(APPEND ${RETURN_VALUE} ${SUBDIR_FILES})
+		ENDFOREACH(PATH ${SOURCE_SUBDIRS})
+	ENDIF ()
+
+	IF (${FILTER_OUT})
+		LIST(REMOVE_ITEM ${RETURN_VALUE} ${FILTER_OUT})
+	ENDIF()
+
+	SET(${RETURN_VALUE} ${${RETURN_VALUE}} PARENT_SCOPE)
+ENDFUNCTION(AUTO_SOURCES)
+
+FUNCTION(CONTAINS_STRING FILE SEARCH RETURN_VALUE)
+	FILE(STRINGS ${FILE} FILE_CONTENTS REGEX ".*${SEARCH}.*")
+	IF (FILE_CONTENTS)
+		SET(${RETURN_VALUE} TRUE PARENT_SCOPE)
+	ENDIF()
+ENDFUNCTION(CONTAINS_STRING)
diff --git a/depends/storage/CMake/Options.cmake b/depends/storage/CMake/Options.cmake
new file mode 100644
index 0000000..36841ea
--- /dev/null
+++ b/depends/storage/CMake/Options.cmake
@@ -0,0 +1,71 @@
+##############################################################################
+# In this file we handle all env and customer's settings
+##############################################################################
+
+##############################################################################
+# Setup build and dependencies information 
+##############################################################################
+SET(DEPENDENCY_INSTALL_PREFIX "/opt/dependency")
+IF($ENV{DEPENDENCY_INSTALL_PREFIX})
+	SET(DEPENDENCY_INSTALL_PREFIX $ENV{DEPENDENCY_INSTALL_PREFIX})
+ENDIF()
+
+SET(DEPENDENCY_DIST_PACKAGE_NAME "dependency-dist-package.tar.gz")
+IF($ENV{DEPENDENCY_DIST_PACKAGE_NAME})
+	SET(DEPENDENCY_DIST_PACKAGE_NAME $ENV{DEPENDENCY_DIST_PACKAGE_NAME})
+ENDIF()
+
+SET(CMAKE_PREFIX_PATH "${DEPENDENCY_INSTALL_PREFIX}" ${CMAKE_PREFIX_PATH})
+SET(CMAKE_PREFIX_PATH "${DEPENDENCY_INSTALL_PREFIX}/package" ${CMAKE_PREFIX_PATH})
+SET(CMAKE_PREFIX_PATH "${DEPENDENCY_INSTALL_PREFIX}/tools" ${CMAKE_PREFIX_PATH})
+
+SET(DEPENDENCY_LIBRARY_PATH "${DEPENDENCY_INSTALL_PREFIX}/package/lib:${DEPENDENCY_LIBRARY_PATH}")
+SET(DEPENDENCY_LIBRARY_PATH "${DEPENDENCY_INSTALL_PREFIX}/package/lib64:${DEPENDENCY_LIBRARY_PATH}")
+
+##############################################################################
+# Setup build flags
+##############################################################################
+OPTION(ENABLE_COVERAGE "enable code coverage." OFF)
+
+IF(NOT CMAKE_BUILD_TYPE)
+    SET(CMAKE_BUILD_TYPE Debug CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." FORCE)
+ENDIF(NOT CMAKE_BUILD_TYPE)
+
+IF(ENABLE_COVERAGE STREQUAL ON)
+    INCLUDE(CodeCoverage)
+ENDIF(ENABLE_COVERAGE STREQUAL ON)
+
+IF(CMAKE_BUILD_TYPE MATCHES Debug)
+    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0")    
+ENDIF()
+
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -fno-omit-frame-pointer -fno-strict-aliasing")
+
+IF(ENABLE_AVX STREQUAL ON)
+    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx -mno-avx2")
+    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DAVX_OPT")
+ELSE()
+    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mno-avx -mno-avx2")
+ENDIF()
+
+#c++11 is needed to provide thread saft singleton implementation. 
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wno-deprecated-register")
+#-Rpass-missed=loop-vectorize  -Wall -Wconversion
+
+IF(CMAKE_COMPILER_IS_CLANG)
+    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-limit-debug-info -stdlib=libc++ -DUSE_CLANG")
+    IF(OS_LINUX)
+        SET(CLANG_LDFLAGS "-lc++abi -lc++" ${CLANG_LDFLAGS})
+    ENDIF(OS_LINUX)
+ENDIF(CMAKE_COMPILER_IS_CLANG)
+
+TRY_COMPILE(INT64T_EQUAL_LONGLONG
+    ${CMAKE_BINARY_DIR}
+    ${CMAKE_CURRENT_SOURCE_DIR}/CMake/CMakeTestCompileInt64tType.cc
+    OUTPUT_VARIABLE OUTPUT)
+
+IF(INT64T_EQUAL_LONGLONG)
+    MESSAGE(STATUS "Checking whether int64_t is typedef to long long -- yes")
+ELSE(INT64T_EQUAL_LONGLONG)
+    MESSAGE(STATUS "Checking whether int64_t is typedef to long long -- no")
+ENDIF(INT64T_EQUAL_LONGLONG)
diff --git a/depends/storage/CMake/Platform.cmake b/depends/storage/CMake/Platform.cmake
new file mode 100644
index 0000000..1ee0238
--- /dev/null
+++ b/depends/storage/CMake/Platform.cmake
@@ -0,0 +1,47 @@
+IF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+    SET(OS_LINUX true CACHE INTERNAL "Linux operating system")
+ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
+    SET(OS_MACOSX true CACHE INTERNAL "Mac Darwin operating system")
+ELSE(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+    MESSAGE(FATAL_ERROR "Unsupported OS: \"${CMAKE_SYSTEM_NAME}\"")
+ENDIF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+
+IF(CMAKE_COMPILER_IS_GNUCXX)
+    EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version  OUTPUT_VARIABLE COMPILER_OUTPUT)
+    
+    STRING(REGEX MATCH "[0-9]\\.[0-9]\\.[0-9]" GCC_COMPILER_VERSION ${COMPILER_OUTPUT})
+    STRING(REGEX MATCHALL "[0-9]" GCC_COMPILER_VERSION ${GCC_COMPILER_VERSION})
+    
+    LIST(LENGTH GCC_COMPILER_VERSION GCC_COMPILER_VERSION_LEN)
+    IF (NOT 3 EQUAL ${GCC_COMPILER_VERSION_LEN})
+        MESSAGE(FATAL_ERROR "Cannot get gcc version from \"${COMPILER_OUTPUT}\"")
+    ENDIF(NOT 3 EQUAL ${GCC_COMPILER_VERSION_LEN})
+    
+    LIST(GET GCC_COMPILER_VERSION 0 GCC_COMPILER_VERSION_MAJOR)
+    LIST(GET GCC_COMPILER_VERSION 1 GCC_COMPILER_VERSION_MINOR)
+    LIST(GET GCC_COMPILER_VERSION 2 GCC_COMPILER_VERSION_PATCH)
+    
+    SET(GCC_COMPILER_VERSION_MAJOR ${GCC_COMPILER_VERSION_MAJOR} CACHE INTERNAL "gcc major version")
+    SET(GCC_COMPILER_VERSION_MINOR ${GCC_COMPILER_VERSION_MINOR} CACHE INTERNAL "gcc minor version")
+    SET(GCC_COMPILER_VERSION_PATCH ${GCC_COMPILER_VERSION_PATCH} CACHE INTERNAL "gcc patch version")
+    
+    MESSAGE(STATUS "checking compiler: GCC (${GCC_COMPILER_VERSION_MAJOR}.${GCC_COMPILER_VERSION_MINOR}.${GCC_COMPILER_VERSION_PATCH})")
+ELSE(CMAKE_COMPILER_IS_GNUCXX)
+    EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version  OUTPUT_VARIABLE COMPILER_OUTPUT)
+    IF(COMPILER_OUTPUT MATCHES "clang")
+        SET(CMAKE_COMPILER_IS_CLANG true CACHE INTERNAL "using clang as compiler")
+        MESSAGE(STATUS "checking compiler: CLANG")
+    ELSE(COMPILER_OUTPUT MATCHES "clang")
+        MESSAGE(FATAL_ERROR "Unsupported compiler: \"${CMAKE_CXX_COMPILER}\"")
+    ENDIF(COMPILER_OUTPUT MATCHES "clang")
+ENDIF(CMAKE_COMPILER_IS_GNUCXX)
+
+INCLUDE (TestBigEndian)
+TEST_BIG_ENDIAN(IS_BIG_ENDIAN)
+if(IS_BIG_ENDIAN)
+ message(STATUS "BIG_ENDIAN")
+ ADD_DEFINITIONS(-DIS_BIG_ENDIAN)
+else()
+ message(STATUS "LITTLE_ENDIAN")
+ ADD_DEFINITIONS(-DIS_LITTLE_ENDIAN)
+endif()
diff --git a/depends/storage/CMakeLists.txt b/depends/storage/CMakeLists.txt
new file mode 100644
index 0000000..d2a64c6
--- /dev/null
+++ b/depends/storage/CMakeLists.txt
@@ -0,0 +1,28 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
+PROJECT(storage)
+
+##############################################################################
+# General CMake initialization
+##############################################################################
+SET(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMake" ${CMAKE_MODULE_PATH})
+SET(CMAKE_VERBOSE_MAKEFILE OFF CACHE STRING "Verbose build." FORCE)
+
+IF(${CMAKE_CURRENT_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR})
+    MESSAGE(FATAL_ERROR "cannot build the project in the source directory! Out-of-source build is enforced!")
+ENDIF()
+
+##############################################################################
+# Import env, customer settings and utilities
+##############################################################################
+INCLUDE(Functions)
+INCLUDE(Platform)
+INCLUDE(Options)
+
+ADD_SUBDIRECTORY(src)
+ADD_SUBDIRECTORY(test)
+
+ADD_CUSTOM_TARGET(coverage
+    COMMAND make resetcoverage
+    COMMAND make -j8 unittest
+    COMMAND make ucoverage
+    COMMENT "Run all unit tests and get coverage...")
diff --git a/depends/storage/README b/depends/storage/README
new file mode 100644
index 0000000..f67eafe
--- /dev/null
+++ b/depends/storage/README
@@ -0,0 +1,15 @@
+# How to build
+## Get source code and Bootstrapping
+
+    cd storage
+    mkdir build
+    cd build
+    ../bootstrap --prefix=/opt/dependency/package (default)
+
+## Build
+
+   make
+
+## Install
+
+   make install
diff --git a/depends/storage/bootstrap b/depends/storage/bootstrap
new file mode 100755
index 0000000..36fd66c
--- /dev/null
+++ b/depends/storage/bootstrap
@@ -0,0 +1,109 @@
+#!/bin/sh
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+die() {
+    echo "$@" 1>&2 ; exit 1
+}
+
+arg ()
+{
+    echo "$1" | sed "s/^${2-[^=]*=}//"
+}
+
+# Detect directory information.
+source_dir=`cd "\`dirname \"$0\"\`";pwd`
+binary_dir=`pwd`
+
+# Choose the default install prefix.
+default_prefix="/opt/dependency/package"
+
+# Display bootstrap usage
+usage() {
+echo '
+Usage: '"$0"' [<options>]
+Options: [defaults in brackets after descriptions]
+Configuration:
+    --help                          print this message
+    --prefix=PREFIX                 install files in tree rooted at PREFIX
+                                    ['"${default_prefix}"']
+    --enable-coverage               enable code coverage, must be used together with --enable-debug
+    --enable-debug                  enable debug build
+    --enable-avx                    enable avx for vector instruction optimization
+'
+    exit 10
+}
+
+# Parse arguments
+prefix_dir="${default_prefix}"
+build_type="Release"
+enable_coverage="OFF"
+enable_avx="ON"
+while test $# != 0; do
+    case "$1" in
+    --prefix=*) dir=`arg "$1"`
+                prefix_dir="$dir";;
+	--enable-coverage) enable_coverage="ON"
+                       build_type="Debug";;
+    --enable-debug) build_type="Debug";;
+    --enable-avx=*) avx=`arg "$1"`
+                    enable_avx="$avx";;
+    --help) usage ;;
+    *) die "Unknown option: $1" ;;
+    esac
+    shift
+done
+
+if [ ${source_dir} = ${binary_dir} ]; then
+	die "cannot build the project in the source directory! Out-of-source build is enforced!"
+fi
+
+enable_avx_upper=`echo "${enable_avx}" | tr [a-z] [A-Z]`
+if [ ${enable_avx_upper} != "ON" ] && [ ${enable_avx_upper} != "OFF" ]; then
+    die "unknown value for option enable-avx: ${enable_avx}, valid options are: on and off"
+fi
+
+# Check clang compiler
+if [[ x"${CC}" = x"" ]]; then
+    CC=gcc
+fi
+
+if [[ x"${CXX}" = x"" ]]; then
+    CXX=g++
+fi
+
+c_compiler=`which ${CC}`
+cxx_compiler=`which ${CXX}`
+cmake=`which cmake`
+
+if [ ! -x ${c_compiler} ]; then
+    die "cannot found c compiler"
+fi
+
+if [ ! -x ${cxx_compiler} ]; then
+    die "cannot found c++ compiler"
+fi
+
+if [ ! -x ${cmake} ]; then
+    die "cannot found cmake"
+fi
+
+# Configure 
+${cmake} -DCMAKE_BUILD_TYPE=${build_type} -DCMAKE_INSTALL_PREFIX=${prefix_dir} -DCMAKE_C_COMPILER=${c_compiler} -DCMAKE_CXX_COMPILER=${cxx_compiler} -DENABLE_COVERAGE=${enable_coverage} -DENABLE_AVX=${enable_avx_upper} ${source_dir} || die "failed to configure the project"
+
+echo 'bootstrap success. Run "make" to build.'
diff --git a/depends/storage/src/CMakeLists.txt b/depends/storage/src/CMakeLists.txt
new file mode 100644
index 0000000..dd7ed3b
--- /dev/null
+++ b/depends/storage/src/CMakeLists.txt
@@ -0,0 +1,84 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
+PROJECT(storage)
+
+FIND_PACKAGE(Protobuf REQUIRED)
+FIND_PACKAGE(JSON REQUIRED)
+FIND_PACKAGE(Snappy REQUIRED)
+FIND_PACKAGE(ZLIB REQUIRED)
+
+SET(storage_VERSION_MAJOR 0)
+SET(storage_VERSION_MINOR 1)
+SET(storage_VERSION_PATCH 0)
+SET(storage_VERSION_API 1)
+set(CMAKE_MACOSX_RPATH 1)
+
+SET(storage_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR})
+SET(storage_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/storage)
+SET(orcformat_proto_DIR ${storage_SRC_DIR}/format/orc)
+
+file(GLOB proto_files "${storage_SRC_DIR}/format/orc/*.proto")
+set(proto_SRC_DIR ${CMAKE_BINARY_DIR}/src/storage/format/orc)
+set(storage_PROTO_HDRS
+       ${proto_SRC_DIR}/orc_proto.pb.h
+)
+set(storage_PROTO_SRCS
+       ${proto_SRC_DIR}/orc_proto.pb.cc
+)
+file(MAKE_DIRECTORY ${proto_SRC_DIR})
+add_custom_command(
+       OUTPUT ${storage_PROTO_SRCS} ${storage_PROTO_HDRS}
+       COMMAND ${Protobuf_PROTOC_EXECUTABLE}
+       ARGS --cpp_out ${CMAKE_BINARY_DIR}/src -I ${CMAKE_CURRENT_SOURCE_DIR} ${proto_files}
+       DEPENDS "${proto_files}"
+       )
+
+AUTO_SOURCES(storage_files "*.cc" "RECURSE" "${storage_SRC_DIR}")
+LIST(APPEND storage_SOURCES ${storage_files})
+
+AUTO_SOURCES(common_HEADER "*.h" "${storage_SRC_DIR}/common")
+AUTO_SOURCES(cwrapper_HEADER "*.h" "${storage_SRC_DIR}/cwrapper")
+AUTO_SOURCES(format_HEADER "*.h" "${storage_SRC_DIR}/format")
+AUTO_SOURCES(orc_format_HEADER "*.h" "${storage_SRC_DIR}/format/orc")
+AUTO_SOURCES(testutil_HEADER "*.h" "${storage_SRC_DIR}/testutil")
+
+INCLUDE_DIRECTORIES(${storage_ROOT_DIR})
+INCLUDE_DIRECTORIES(${DEPENDENCY_INSTALL_PREFIX}/package/include)
+INCLUDE_DIRECTORIES(${CMAKE_BINARY_DIR}/src)
+INCLUDE_DIRECTORIES(/usr/local/include)
+LINK_DIRECTORIES(/usr/local/lib)
+LINK_DIRECTORIES(${DEPENDENCY_INSTALL_PREFIX}/package/lib)
+
+
+ADD_LIBRARY(storage-shared SHARED ${storage_SOURCES} ${storage_PROTO_SRCS} ${storage_PROTO_HDRS})
+# ADD_LIBRARY(storage-static STATIC ${storage_SOURCES} ${storage_PROTO_SRCS} ${storage_PROTO_HDRS})
+
+SET_TARGET_PROPERTIES(storage-shared PROPERTIES OUTPUT_NAME "storage")
+# SET_TARGET_PROPERTIES(storage-static PROPERTIES OUTPUT_NAME "storage")
+
+target_link_libraries(storage-shared ${CLANG_LDFLAGS}
+                      dbcommon
+                      univplan
+                      hdfs3
+                      snappy
+                      lz4
+                      ${ZLIB_LIBRARIES}
+                      ${JSON_LIBRARIES}
+                      glog
+                      protobuf
+                      pthread
+                      iconv)
+# target_link_libraries(storage-static ${CLANG_LDFLAGS} dbcommon univplan hdfs3 snappy lz4 ${ZLIB_LIBRARIES} ${JSON_LIBRARIES} glog protobuf pthread iconv)
+
+INSTALL(TARGETS storage-shared
+     RUNTIME DESTINATION bin
+     LIBRARY DESTINATION lib
+     ARCHIVE DESTINATION lib)
+
+INSTALL(FILES ${common_HEADER} DESTINATION include/storage/common)
+INSTALL(FILES ${cwrapper_HEADER} DESTINATION include/storage/cwrapper)
+INSTALL(FILES ${format_HEADER} DESTINATION include/storage/format)
+INSTALL(FILES ${orc_format_HEADER} DESTINATION include/storage/format/orc)
+INSTALL(FILES ${storage_PROTO_HDRS} DESTINATION include/storage/format/orc)
+INSTALL(FILES ${testutil_HEADER} DESTINATION include/storage/testutil)
+
+SET(storage_ROOT_DIR ${storage_ROOT_DIR} PARENT_SCOPE)
diff --git a/depends/storage/src/storage/README b/depends/storage/src/storage/README
new file mode 100644
index 0000000..e69de29
diff --git a/depends/storage/src/storage/common/bloom-filter.h b/depends/storage/src/storage/common/bloom-filter.h
new file mode 100644
index 0000000..726cbea
--- /dev/null
+++ b/depends/storage/src/storage/common/bloom-filter.h
@@ -0,0 +1,189 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef STORAGE_SRC_STORAGE_COMMON_BLOOM_FILTER_H_
+#define STORAGE_SRC_STORAGE_COMMON_BLOOM_FILTER_H_
+
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+
+#include "dbcommon/hash/fast-hash.h"
+
+namespace storage {
+
+class MyBitSet {
+ public:
+  explicit MyBitSet(int64_t bits) : ownData(true) {
+    length = static_cast<int64_t>(
+        ceil(static_cast<double>(bits) / static_cast<double>(64)));
+    data = new uint64_t[length];
+    clear();
+  }
+
+  MyBitSet(uint64_t *data, int64_t size) : data(data), length(size) {
+    assert(data != nullptr && length > 0);
+  }
+
+  ~MyBitSet() {
+    if (ownData) {
+      delete data;
+    }
+  }
+
+  typedef std::unique_ptr<MyBitSet> uptr;
+
+  void set(int64_t index) {
+    assert(index >= 0);
+    data[index >> 6] |= (1L << index);
+  }
+
+  bool get(int64_t index) {
+    assert(index >= 0);
+    return (data[index >> 6] & (1L << index)) != 0;
+  }
+
+  uint64_t *getData() { return data; }
+  int64_t size() { return length; }
+
+  void clear() { memset(data, 0, size() * 8); }
+
+ private:
+  uint64_t *data = nullptr;
+  int64_t length = 0;
+  bool ownData = false;
+};
+
+class BloomFilter {
+ public:
+  explicit BloomFilter(int64_t expectedEntry) : kExpectedEntry(expectedEntry) {
+    assert(kExpectedEntry > 0 && "expectedEntries should be > 0");
+    assert(kDefaultFpp > 0.0 && kDefaultFpp < 1.0 &&
+           "False positive probability should be > 0.0 & < 1.0");
+    int64_t nb = optimalNumOfBits(kExpectedEntry, kDefaultFpp);
+    numBits = nb + 64 - (nb % 64);
+    numHashFunctions = optimalNumOfHashFunctions(kExpectedEntry, numBits);
+    bitSet.reset(new MyBitSet(numBits));
+  }
+
+  BloomFilter(uint64_t *bits, int64_t size, uint32_t numFuncs) {
+    bitSet.reset(new MyBitSet(bits, size));
+    numBits = size * 64;
+    numHashFunctions = numFuncs;
+  }
+
+  virtual ~BloomFilter() {}
+
+  typedef std::unique_ptr<BloomFilter> uptr;
+
+  void addInt(int64_t val) { addHash(getIntegerHash(val)); }
+  bool testInt(int64_t val) { return testHash(getIntegerHash(val)); }
+
+  void addDouble(double val) { addInt(doubleToRawBits(val)); }
+  bool testDouble(double val) { return testInt(doubleToRawBits(val)); }
+
+  void addString(const char *buffer, uint64_t len) {
+    int64_t hash64 = static_cast<int64_t>(murmur3.hash64(buffer, len));
+    addHash(hash64);
+  }
+  bool testString(const char *buffer, uint64_t len) {
+    int64_t hash64 = static_cast<int64_t>(murmur3.hash64(buffer, len));
+    return testHash(hash64);
+  }
+
+  uint64_t *getBitSet() { return bitSet->getData(); }
+  int64_t size() { return bitSet->size(); }
+
+  void reset() { bitSet->clear(); }
+
+  uint32_t getNumHashFunctions() { return numHashFunctions; }
+
+ private:
+  int64_t optimalNumOfBits(int64_t n, double p) {
+    auto ln2 = std::log(2);
+    return static_cast<int64_t>(std::ceil(-(n * std::log(p) / ln2 / ln2)));
+  }
+
+  uint32_t optimalNumOfHashFunctions(int64_t n, int64_t m) {
+    auto frac = static_cast<double>(m) / static_cast<double>(n);
+    return static_cast<uint32_t>(std::ceil(frac * std::log(2)));
+  }
+
+  int64_t getIntegerHash(int64_t key) {
+    key = (~key) + (key << 21);  // key = (key << 21) - key - 1;
+    key = key ^ (key >> 24);
+    key = (key + (key << 3)) + (key << 8);  // key * 265
+    key = key ^ (key >> 14);
+    key = (key + (key << 2)) + (key << 4);  // key * 21
+    key = key ^ (key >> 28);
+    key = key + (key << 31);
+    return key;
+  }
+
+  void addHash(int64_t hash64) {
+    int64_t hash1 = hash64;
+    int64_t hash2 = static_cast<int64_t>(static_cast<uint64_t>(hash64) >> 32);
+
+    for (uint32_t i = 1; i <= numHashFunctions; ++i) {
+      int64_t combinedHash = hash1 + (i * hash2);
+      // hashcode should be positive, flip all the bits if it's negative
+      if (combinedHash < 0) {
+        combinedHash = ~combinedHash;
+      }
+      int64_t pos = combinedHash % numBits;
+      bitSet->set(pos);
+    }
+  }
+
+  bool testHash(int64_t hash64) {
+    int64_t hash1 = hash64;
+    int64_t hash2 = static_cast<int64_t>(static_cast<uint64_t>(hash64) >> 32);
+
+    for (uint32_t i = 1; i <= numHashFunctions; ++i) {
+      int64_t combinedHash = hash1 + (i * hash2);
+      // hashcode should be positive, flip all the bits if it's negative
+      if (combinedHash < 0) {
+        combinedHash = ~combinedHash;
+      }
+      int64_t pos = combinedHash % numBits;
+      if (!bitSet->get(pos)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  int64_t doubleToRawBits(double val) {
+    int64_t bits;
+    memcpy(&bits, &val, sizeof(bits));
+    return bits;
+  }
+
+ private:
+  int64_t numBits = 0;
+  uint32_t numHashFunctions = 0;
+  int64_t kExpectedEntry = 0;
+  MyBitSet::uptr bitSet = nullptr;
+  const double kDefaultFpp = 0.05;
+  dbcommon::Murmur3 murmur3;
+};
+
+}  // namespace storage
+
+#endif  // STORAGE_SRC_STORAGE_COMMON_BLOOM_FILTER_H_
diff --git a/depends/storage/src/storage/common/string.h b/depends/storage/src/storage/common/string.h
new file mode 100644
index 0000000..f7518e0
--- /dev/null
+++ b/depends/storage/src/storage/common/string.h
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef STORAGE_SRC_STORAGE_COMMON_STRING_H_
+#define STORAGE_SRC_STORAGE_COMMON_STRING_H_
+
+#include <cassert>
+#include <string>
+
+#include "dbcommon/utils/cutils.h"
+
+namespace storage {
+
+#define STRING_INIT_RESERVE_BYTES 1024
+
+class String {
+ public:
+  String() {
+    assert(reserved_ > 0);
+    data_ = dbcommon::cnmalloc(reserved_);
+  }
+
+  virtual ~String() { dbcommon::cnfree(data_); }
+
+  String &operator=(const String &) = delete;
+
+  void append(const char *value, uint32_t sz) {
+    enlarge(sz);
+    assert(size_ + sz <= reserved_);
+    memcpy(&data_[size_], value, sz);
+    size_ += sz;
+  }
+
+  void append(const char *value, uint32_t pos, uint32_t sz) {
+    enlarge(sz);
+    assert(size_ + sz <= reserved_);
+    memcpy(&data_[size_], value + pos, sz);
+    size_ += sz;
+  }
+
+  void appendChar(char value) {
+    enlarge(1);
+    assert(size_ + 1 <= reserved_);
+    *(reinterpret_cast<char *>(&data_[size_])) = value;
+    size_ += 1;
+  }
+
+  char *data() const { return data_; }
+
+  uint32_t size() const { return size_; }
+
+  void reset() { size_ = 0; }
+
+  std::string substr(uint32_t pos, uint32_t len) {
+    std::string str;
+    for (uint32_t i = pos, end = pos + len; i < end; ++i) str += data_[i];
+    return str;
+  }
+
+ private:
+  void enlarge(uint32_t needed) {
+    needed += size_;
+    if (needed <= reserved_) return;
+
+    uint32_t newLen = 2 * reserved_;
+    while (needed > newLen) newLen *= 2;
+
+    data_ = dbcommon::cnrealloc(data_, sizeof(char) * newLen);
+    reserved_ = newLen;
+  }
+
+ private:
+  char *data_ = nullptr;
+  uint32_t size_ = 0;
+  uint32_t reserved_ = STRING_INIT_RESERVE_BYTES;
+};
+}  // namespace storage
+
+#endif  // STORAGE_SRC_STORAGE_COMMON_STRING_H_
diff --git a/depends/storage/src/storage/cwrapper/hdfs-file-system-c.cc b/depends/storage/src/storage/cwrapper/hdfs-file-system-c.cc
new file mode 100644
index 0000000..c468222
--- /dev/null
+++ b/depends/storage/src/storage/cwrapper/hdfs-file-system-c.cc
@@ -0,0 +1,486 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "storage/cwrapper/hdfs-file-system-c.h"
+
+#include <string>
+#include <vector>
+
+#include "dbcommon/filesystem/hdfs/hdfs-file-system.h"
+#include "dbcommon/log/logger.h"
+#include "dbcommon/utils/file-info.h"
+#include "dbcommon/utils/global.h"
+#include "dbcommon/utils/macro.h"
+
+extern "C" {
+
+struct FscHdfsFileC {
+  void *f;
+};
+
+struct FscHdfsFileInfoC {
+  void *fi;
+};
+
+struct FscHdfsFileInfoArrayC {
+  int size;
+  void **fiVec;
+};
+
+struct FscHdfsFileBlockLocationArrayC {
+  int size;
+  void **fblVec;
+};
+
+struct FscHdfsFileBlockLocationC {
+  void *bl;
+};
+
+struct FscHdfsFileSystemC {
+  void *fs;
+  CatchedError error;
+};
+
+#define FETCH_FILE_SYSTEM_HANDLE(ofs, ifs) \
+  dbcommon::FileSystem *ifs = static_cast<dbcommon::FileSystem *>((ofs)->fs);
+
+#define FETCH_HDFS_FILE_SYSTEM_HANDLE(ofs, ifs) \
+  dbcommon::HdfsFileSystem *ifs =               \
+      static_cast<dbcommon::HdfsFileSystem *>((ofs)->fs);
+
+#define FETCH_FILE_HANDLE(ofile, ifile) \
+  dbcommon::File *ifile = static_cast<dbcommon::File *>((ofile)->f);
+
+#define FETCH_FILE_INFO_HANDLE(ofi, ifi) \
+  dbcommon::FileInfo *ifi = static_cast<dbcommon::FileInfo *>((ofi)->fi);
+
+#define FETCH_FILE_BLOCK_LOCATION_HANDLE(obl, ibl) \
+  dbcommon::FileBlockLocation *ibl =               \
+      static_cast<dbcommon::FileBlockLocation *>((obl)->bl);
+
+void FscHdfsSetError(CatchedError *ce, int errCode, const char *reason) {
+  assert(ce != nullptr);
+  FscHdfsFreeErrorContent(ce); /* free the old one if it was filled already */
+  ce->errCode = errCode;
+  ce->errMessage = new char[strlen(reason) + 1]();
+  strcpy(ce->errMessage, reason); /* NOLINT */
+}
+
+void FscHdfsCloseFileC(FscHdfsFileSystemC *fs, FscHdfsFileC *f) {
+  int errCode;
+  std::string errMessage;
+  FETCH_FILE_HANDLE(f, ifile)
+  try {
+    ifile->close();
+  } catch (dbcommon::TransactionAbortException &e) {
+    FscHdfsSetError(&(fs->error), e.errCode(), e.what());
+  }
+}
+
+FscHdfsFileSystemC *FscHdfsNewFileSystem(const char *namenode, uint16_t port) {
+  try {
+    std::string url("hdfs://");
+    url += namenode;
+    url += ":" + std::to_string(port);
+    dbcommon::FileSystem *fs = FSManager.get(url);
+
+    FscHdfsFileSystemC *result = new FscHdfsFileSystemC();
+    result->error.errCode = 0;
+    result->error.errMessage = nullptr;
+    result->fs = fs;
+    return result;
+  } catch (dbcommon::TransactionAbortException &e) {
+    return nullptr;
+  }
+}
+
+FscHdfsFileC *FscHdfsOpenFile(FscHdfsFileSystemC *fs, const char *path,
+                              int flags) {
+  FETCH_FILE_SYSTEM_HANDLE(fs, ifs)
+  try {
+    std::unique_ptr<dbcommon::File> file = ifs->open(path, flags);
+    FscHdfsFileC *result = new FscHdfsFileC();
+    result->f = file.release();
+    return result;
+  } catch (dbcommon::TransactionAbortException &e) {
+    FscHdfsSetError(&(fs->error), e.errCode(), e.what());
+    return nullptr;
+  }
+}
+
+void FscHdfsSeekFile(FscHdfsFileSystemC *fs, FscHdfsFileC *f, uint64_t offset) {
+  FETCH_FILE_SYSTEM_HANDLE(fs, ifs)
+  FETCH_FILE_HANDLE(f, ifile)
+  try {
+    ifs->seek(ifile, offset);
+  } catch (dbcommon::TransactionAbortException &e) {
+    FscHdfsSetError(&(fs->error), e.errCode(), e.what());
+  }
+}
+
+void FscHdfsRemovePath(FscHdfsFileSystemC *fs, const char *path) {
+  FETCH_FILE_SYSTEM_HANDLE(fs, ifs)
+  try {
+    ifs->remove(path);
+  } catch (dbcommon::TransactionAbortException &e) {
+    FscHdfsSetError(&(fs->error), e.errCode(), e.what());
+  }
+}
+
+void FscHdfsRemovePathIfExists(FscHdfsFileSystemC *fs, const char *path) {
+  FETCH_FILE_SYSTEM_HANDLE(fs, ifs)
+  try {
+    ifs->removeIfExists(path);
+  } catch (dbcommon::TransactionAbortException &e) {
+    FscHdfsSetError(&(fs->error), e.errCode(), e.what());
+  }
+}
+
+FscHdfsFileInfoC *FscHdfsGetFileInfo(FscHdfsFileSystemC *fs,
+                                     const char *fileName) {
+  FETCH_FILE_SYSTEM_HANDLE(fs, ifs)
+  try {
+    std::unique_ptr<dbcommon::FileInfo> finfo = ifs->getFileInfo(fileName);
+    FscHdfsFileInfoC *result = new FscHdfsFileInfoC();
+    result->fi = finfo.release();
+    return result;
+  } catch (dbcommon::TransactionAbortException &e) {
+    FscHdfsSetError(&(fs->error), e.errCode(), e.what());
+    return nullptr;
+  }
+}
+
+int FscHdfsExistPath(FscHdfsFileSystemC *fs, const char *path) {
+  FETCH_FILE_SYSTEM_HANDLE(fs, ifs)
+  return ifs->exists(path) ? 1 : 0;
+}
+
+int64_t FscHdfsGetFileLength(FscHdfsFileSystemC *fs, const char *path) {
+  FETCH_FILE_SYSTEM_HANDLE(fs, ifs)
+  try {
+    return ifs->getFileLength(path);
+  } catch (dbcommon::TransactionAbortException &e) {
+    FscHdfsSetError(&(fs->error), e.errCode(), e.what());
+    return -1;
+  }
+}
+
+char FscHdfsGetFileKind(FscHdfsFileSystemC *fs, const char *path) {
+  FETCH_FILE_SYSTEM_HANDLE(fs, ifs)
+  try {
+    return ifs->getFileKind(path);
+  } catch (dbcommon::TransactionAbortException &e) {
+    FscHdfsSetError(&(fs->error), e.errCode(), e.what());
+    return 'U';
+  }
+}
+
+int FscHdfsReadFile(FscHdfsFileSystemC *fs, FscHdfsFileC *f, void *buf,
+                    int size) {
+  FETCH_FILE_SYSTEM_HANDLE(fs, ifs)
+  FETCH_FILE_HANDLE(f, ifile)
+  try {
+    return ifs->read(ifile, buf, size);
+  } catch (dbcommon::TransactionAbortException &e) {
+    FscHdfsSetError(&(fs->error), e.errCode(), e.what());
+    return -1;
+  }
+}
+
+void FscHdfsWriteFile(FscHdfsFileSystemC *fs, FscHdfsFileC *f, void *buf,
+                      int size) {
+  FETCH_FILE_SYSTEM_HANDLE(fs, ifs)
+  FETCH_FILE_HANDLE(f, ifile)
+  try {
+    ifs->write(ifile, buf, size);
+  } catch (dbcommon::TransactionAbortException &e) {
+    FscHdfsSetError(&(fs->error), e.errCode(), e.what());
+  }
+}
+
+void FscHdfsCreateDir(FscHdfsFileSystemC *fs, const char *path) {
+  FETCH_FILE_SYSTEM_HANDLE(fs, ifs)
+  try {
+    ifs->createDir(path);
+  } catch (dbcommon::TransactionAbortException &e) {
+    FscHdfsSetError(&(fs->error), e.errCode(), e.what());
+  }
+}
+
+int FscHdfsExistInsertPath(FscHdfsFileSystemC *fs, const char *path) {
+  FETCH_FILE_SYSTEM_HANDLE(fs, ifs)
+  std::string fullPath(path);
+  fullPath += INSERT_HIDDEN_DIR;
+  return ifs->exists(fullPath.c_str()) ? 1 : 0;
+}
+
+void FscHdfsCreateInsertDir(FscHdfsFileSystemC *fs, const char *path) {
+  FETCH_FILE_SYSTEM_HANDLE(fs, ifs)
+  try {
+    std::string fullPath(path);
+    fullPath += INSERT_HIDDEN_DIR;
+    ifs->createDir(fullPath.c_str());
+  } catch (dbcommon::TransactionAbortException &e) {
+    FscHdfsSetError(&(fs->error), e.errCode(), e.what());
+  }
+}
+
+FscHdfsFileInfoArrayC *FscHdfsDirPath(FscHdfsFileSystemC *fs,
+                                      const char *path) {
+  FETCH_FILE_SYSTEM_HANDLE(fs, ifs)
+  try {
+    std::vector<std::unique_ptr<dbcommon::FileInfo> > finfovector =
+        ifs->dir(path);
+    FscHdfsFileInfoArrayC *result = new FscHdfsFileInfoArrayC();
+    result->size = finfovector.size();
+    result->fiVec = new void *[result->size];
+    for (int i = 0; i < result->size; ++i) {
+      dbcommon::FileInfo *newfi = new dbcommon::FileInfo();
+      FscHdfsFileInfoC *newfic = new FscHdfsFileInfoC();
+      newfic->fi = newfi;
+      result->fiVec[i] = newfic;
+
+      *newfi = *(finfovector[i]);
+    }
+    return result;
+  } catch (dbcommon::TransactionAbortException &e) {
+    FscHdfsSetError(&(fs->error), e.errCode(), e.what());
+    return nullptr;
+  }
+}
+
+void FscHdfsChmodPath(FscHdfsFileSystemC *fs, const char *path, int mode) {
+  FETCH_FILE_SYSTEM_HANDLE(fs, ifs)
+  try {
+    ifs->chmod(path, mode);
+  } catch (dbcommon::TransactionAbortException &e) {
+    FscHdfsSetError(&(fs->error), e.errCode(), e.what());
+  }
+}
+
+int64_t FscHdfsTellFile(FscHdfsFileSystemC *fs, FscHdfsFileC *f) {
+  FETCH_FILE_SYSTEM_HANDLE(fs, ifs)
+  FETCH_FILE_HANDLE(f, ifile)
+  try {
+    return ifs->tell(ifile);
+  } catch (dbcommon::TransactionAbortException &e) {
+    FscHdfsSetError(&(fs->error), e.errCode(), e.what());
+    return -1;
+  }
+}
+
+FscHdfsFileBlockLocationArrayC *FscHdfsGetPathFileBlockLocation(
+    FscHdfsFileSystemC *fs, const char *path, int64_t start, int64_t length) {
+  FETCH_FILE_SYSTEM_HANDLE(fs, ifs)
+  try {
+    std::vector<std::unique_ptr<dbcommon::FileBlockLocation> >  // NOLINT
+        fblvector = ifs->getFileBlockLocation(path, start, length);
+    FscHdfsFileBlockLocationArrayC *result =
+        new FscHdfsFileBlockLocationArrayC();
+    result->size = fblvector.size();
+    result->fblVec = new void *[result->size];
+    for (int i = 0; i < result->size; ++i) {
+      FscHdfsFileBlockLocationC *newblc = new FscHdfsFileBlockLocationC();
+      dbcommon::FileBlockLocation *newbl = new dbcommon::FileBlockLocation();
+      newblc->bl = newbl;
+      result->fblVec[i] = newblc;
+
+      newbl->corrupt = fblvector[i]->corrupt;
+      newbl->length = fblvector[i]->length;
+      newbl->offset = fblvector[i]->offset;
+      newbl->hosts = fblvector[i]->hosts;
+      newbl->names = fblvector[i]->names;
+      newbl->ports = fblvector[i]->ports;
+      newbl->topoPaths = fblvector[i]->topoPaths;
+    }
+    return result;
+  } catch (dbcommon::TransactionAbortException &e) {
+    FscHdfsSetError(&(fs->error), e.errCode(), e.what());
+    return nullptr;
+  }
+}
+
+void FscHdfsSetFileSystemBlockSize(FscHdfsFileSystemC *fs, int size) {
+  FETCH_FILE_SYSTEM_HANDLE(fs, ifs)
+  ifs->setBlockSize(size);
+}
+
+int FscHdfsGetFileSystemBlockSize(FscHdfsFileSystemC *fs) {
+  FETCH_FILE_SYSTEM_HANDLE(fs, ifs)
+  return ifs->getBlockSize();
+}
+
+const char *FscHdfsGetFileSystemAddress(FscHdfsFileSystemC *fs) {
+  FETCH_HDFS_FILE_SYSTEM_HANDLE(fs, ifs)
+  return ifs->getFileSystemNameNodeAddr().c_str();
+}
+
+uint16_t FscHdfsGetFileSystemPort(FscHdfsFileSystemC *fs) {
+  FETCH_HDFS_FILE_SYSTEM_HANDLE(fs, ifs)
+  return ifs->getFileSystemPort();
+}
+
+int FscHdfsHasErrorRaised(FscHdfsFileSystemC *fs) {
+  FETCH_FILE_SYSTEM_HANDLE(fs, ifs)
+  return fs->error.errCode != 0 ? 1 : 0;
+}
+
+void FscHdfsFreeFileSystemC(FscHdfsFileSystemC **fs) {
+  if (*fs == nullptr) return;
+  FscHdfsFreeErrorContent(&((*fs)->error));
+  delete *fs;
+  *fs = nullptr;
+}
+
+void FscHdfsFreeFileC(FscHdfsFileC **f) {
+  if (*f == nullptr) return;
+  FETCH_FILE_HANDLE((*f), ifile)
+  delete ifile;
+  delete *f;
+  *f = nullptr;
+}
+
+void FscHdfsFreeFileInfoArrayC(FscHdfsFileInfoArrayC **fiArray) {
+  if (*fiArray == nullptr) return;
+  for (int i = 0; i < (*fiArray)->size; ++i) {
+    FscHdfsFileInfoC *pfic =
+        static_cast<FscHdfsFileInfoC *>((*fiArray)->fiVec[i]);
+    dbcommon::FileInfo *pfi = static_cast<dbcommon::FileInfo *>(pfic->fi);
+    delete pfi;
+    delete pfic;
+  }
+  delete *fiArray;
+  *fiArray = nullptr;
+}
+
+void FscHdfsFreeFileBlockLocationArrayC(
+    FscHdfsFileBlockLocationArrayC **fblArray) {
+  if (*fblArray == nullptr) return;
+  for (int i = 0; i < (*fblArray)->size; ++i) {
+    FscHdfsFileBlockLocationC *fblc =
+        static_cast<FscHdfsFileBlockLocationC *>((*fblArray)->fblVec[i]);
+    dbcommon::FileBlockLocation *fbl =
+        static_cast<dbcommon::FileBlockLocation *>(fblc->bl);
+    delete fbl;
+    delete fblc;
+  }
+  delete *fblArray;
+  *fblArray = nullptr;
+}
+
+void FscHdfsFreeErrorContent(CatchedError *ce) {
+  assert(ce != nullptr);
+  if (ce->errMessage != nullptr) {
+    delete[] ce->errMessage;
+  }
+}
+
+FscHdfsFileInfoC *FscHdfsGetFileInfoFromArray(FscHdfsFileInfoArrayC *fia,
+                                              int index) {
+  if (index < 0 || index >= fia->size) {
+    return nullptr;
+  }
+  return static_cast<FscHdfsFileInfoC *>(fia->fiVec[index]);
+}
+
+const char *FscHdfsGetFileInfoName(FscHdfsFileInfoC *fi) {
+  FETCH_FILE_INFO_HANDLE(fi, ifi)
+  return ifi->name.c_str();
+}
+
+int64_t FscHdfsGetFileInfoLength(FscHdfsFileInfoC *fi) {
+  FETCH_FILE_INFO_HANDLE(fi, ifi)
+  return ifi->size;
+}
+
+FscHdfsFileBlockLocationC *FscHdfsGetFileBlockLocationFromArray(
+    FscHdfsFileBlockLocationArrayC *bla, int index) {
+  if (index < 0 || index >= bla->size) {
+    return nullptr;
+  }
+  return static_cast<FscHdfsFileBlockLocationC *>(bla->fblVec[index]);
+}
+
+int FscHdfsGetFileBlockLocationArraySize(FscHdfsFileBlockLocationArrayC *bla) {
+  return bla->size;
+}
+
+int FscHdfsGetFileBlockLocationNNodes(FscHdfsFileBlockLocationC *bl) {
+  FETCH_FILE_BLOCK_LOCATION_HANDLE(bl, ibl)
+  return ibl->hosts.size();
+}
+
+int64_t FscHdfsGetFileBlockLocationOffset(FscHdfsFileBlockLocationC *bl) {
+  FETCH_FILE_BLOCK_LOCATION_HANDLE(bl, ibl)
+  return ibl->offset;
+}
+int64_t FscHdfsGetFileBlockLocationLength(FscHdfsFileBlockLocationC *bl) {
+  FETCH_FILE_BLOCK_LOCATION_HANDLE(bl, ibl)
+  return ibl->length;
+}
+
+int FscHdfsGetFileBlockLocationCorrupt(FscHdfsFileBlockLocationC *bl) {
+  FETCH_FILE_BLOCK_LOCATION_HANDLE(bl, ibl)
+  return ibl->corrupt;
+}
+
+const char *FscHdfsGetFileBlockLocationNodeHost(FscHdfsFileBlockLocationC *bl,
+                                                int index) {
+  FETCH_FILE_BLOCK_LOCATION_HANDLE(bl, ibl)
+  return ibl->hosts[index].c_str();
+}
+const char *FscHdfsGetFileBlockLocationNodeName(FscHdfsFileBlockLocationC *bl,
+                                                int index) {
+  FETCH_FILE_BLOCK_LOCATION_HANDLE(bl, ibl)
+  return ibl->names[index].c_str();
+}
+const char *FscHdfsGetFileBlockLocationNodeTopoPath(
+    FscHdfsFileBlockLocationC *bl, int index) {
+  FETCH_FILE_BLOCK_LOCATION_HANDLE(bl, ibl)
+  return ibl->topoPaths[index].c_str();
+}
+
+void FscHdfsFreeString(char **pstr) {
+  delete[] * pstr;
+  *pstr = nullptr;
+}
+
+CatchedError *FscHdfsGetFileSystemError(FscHdfsFileSystemC *fs) {
+  return &(fs->error);
+}
+
+void SetToken(const char *tokenkey, const char *token) {
+  if (token) {
+    std::string Token(token);
+    std::string TokenKey(tokenkey);
+    FSManager.setTokenMap(TokenKey, Token);
+  }
+}
+
+void SetCcname(const char *ccname) {
+  if (ccname) {
+    std::string Ccname(ccname);
+    FSManager.setCcname(Ccname);
+  }
+}
+void cleanup_FSManager() {
+  FSManager.clearFsMap();
+  FSManager.clearFsTokenMap();
+}
+}
diff --git a/depends/storage/src/storage/cwrapper/hdfs-file-system-c.h b/depends/storage/src/storage/cwrapper/hdfs-file-system-c.h
new file mode 100644
index 0000000..13707c4
--- /dev/null
+++ b/depends/storage/src/storage/cwrapper/hdfs-file-system-c.h
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef STORAGE_SRC_STORAGE_CWRAPPER_HDFS_FILE_SYSTEM_C_H_
+#define STORAGE_SRC_STORAGE_CWRAPPER_HDFS_FILE_SYSTEM_C_H_
+
+#include <fcntl.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct FscHdfsFileC;
+struct FscHdfsFileInfoC;
+struct FscHdfsFileInfoArrayC;
+struct FscHdfsFileBlockLocationC;
+struct FscHdfsFileBlockLocationArrayC;
+struct FscHdfsFileSystemC;
+
+typedef struct FscHdfsFileC FscHdfsFileC;
+typedef struct FscHdfsFileInfoC FscHdfsFileInfoC;
+typedef struct FscHdfsFileInfoArrayC FscHdfsFileInfoArrayC;
+typedef struct FscHdfsFileBlockLocationC FscHdfsFileBlockLocationC;
+typedef struct FscHdfsFileBlockLocationArrayC FscHdfsFileBlockLocationArrayC;
+typedef struct FscHdfsFileSystemC FscHdfsFileSystemC;
+
+typedef struct CatchedError {
+  int errCode;
+  char *errMessage;
+} CatchedError;
+
+// Set error
+void FscHdfsSetError(CatchedError *ce, int errCode, const char *reason);
+CatchedError *FscHdfsGetFileSystemError(FscHdfsFileSystemC *fs);
+
+// File APIs
+void FscHdfsCloseFileC(FscHdfsFileSystemC *fs, FscHdfsFileC *f);
+
+// File system APIs
+FscHdfsFileSystemC *FscHdfsNewFileSystem(const char *namenode, uint16_t port);
+
+FscHdfsFileC *FscHdfsOpenFile(FscHdfsFileSystemC *fs, const char *path,
+                              int flags);
+void FscHdfsSeekFile(FscHdfsFileSystemC *fs, FscHdfsFileC *f, uint64_t offset);
+void FscHdfsRemovePath(FscHdfsFileSystemC *fs, const char *path);
+void FscHdfsRemovePathIfExists(FscHdfsFileSystemC *fs, const char *path);
+FscHdfsFileInfoC *FscHdfsGetFileInfo(FscHdfsFileSystemC *fs,
+                                     const char *fileName);
+int FscHdfsExistPath(FscHdfsFileSystemC *fs, const char *path);
+int64_t FscHdfsGetFileLength(FscHdfsFileSystemC *fs, const char *path);
+int FscHdfsReadFile(FscHdfsFileSystemC *fs, FscHdfsFileC *f, void *buf,
+                    int size);
+void FscHdfsWriteFile(FscHdfsFileSystemC *fs, FscHdfsFileC *f, void *buf,
+                      int size);
+void FscHdfsCreateDir(FscHdfsFileSystemC *fs, const char *path);
+// this is a special interface to create dir with a hidden ".tmp" subdir
+int FscHdfsExistInsertPath(FscHdfsFileSystemC *fs, const char *path);
+void FscHdfsCreateInsertDir(FscHdfsFileSystemC *fs, const char *path);
+FscHdfsFileInfoArrayC *FscHdfsDirPath(FscHdfsFileSystemC *fs, const char *path);
+void FscHdfsChmodPath(FscHdfsFileSystemC *fs, const char *path, int mode);
+int64_t FscHdfsTellFile(FscHdfsFileSystemC *fs, FscHdfsFileC *f);
+FscHdfsFileBlockLocationArrayC *FscHdfsGetPathFileBlockLocation(
+    FscHdfsFileSystemC *fs, const char *path, int64_t start, int64_t length);
+void FscHdfsSetFileSystemBlockSize(FscHdfsFileSystemC *fs, int size);
+int FscHdfsGetFileSystemBlockSize(FscHdfsFileSystemC *fs);
+
+const char *FscHdfsGetFileSystemAddress(FscHdfsFileSystemC *fs);
+uint16_t FscHdfsGetFileSystemPort(FscHdfsFileSystemC *fs);
+
+FscHdfsFileInfoC *FscHdfsGetFileInfoFromArray(FscHdfsFileInfoArrayC *fia,
+                                              int index);
+const char *FscHdfsGetFileInfoName(FscHdfsFileInfoC *fi);
+int64_t FscHdfsGetFileInfoLength(FscHdfsFileInfoC *fi);
+
+FscHdfsFileBlockLocationC *FscHdfsGetFileBlockLocationFromArray(
+    FscHdfsFileBlockLocationArrayC *bla, int index);
+
+int FscHdfsGetFileBlockLocationArraySize(FscHdfsFileBlockLocationArrayC *bla);
+
+int FscHdfsGetFileBlockLocationNNodes(FscHdfsFileBlockLocationC *bl);
+int64_t FscHdfsGetFileBlockLocationOffset(FscHdfsFileBlockLocationC *bl);
+int64_t FscHdfsGetFileBlockLocationLength(FscHdfsFileBlockLocationC *bl);
+int FscHdfsGetFileBlockLocationCorrupt(FscHdfsFileBlockLocationC *bl);
+const char *FscHdfsGetFileBlockLocationNodeHost(FscHdfsFileBlockLocationC *bl,
+                                                int index);
+const char *FscHdfsGetFileBlockLocationNodeName(FscHdfsFileBlockLocationC *bl,
+                                                int index);
+const char *FscHdfsGetFileBlockLocationNodeTopoPath(
+    FscHdfsFileBlockLocationC *bl, int index);
+
+int FscHdfsHasErrorRaised(FscHdfsFileSystemC *fs);
+
+// Still need some additional free/delete APIs to help release memory
+void FscHdfsFreeFileSystemC(FscHdfsFileSystemC **fs);
+void FscHdfsFreeFileC(FscHdfsFileC **f);
+void FscHdfsFreeFileInfoArrayC(FscHdfsFileInfoArrayC **fiArray);
+void FscHdfsFreeFileBlockLocationArrayC(
+    FscHdfsFileBlockLocationArrayC **fblArray);
+void FscHdfsFreeErrorContent(CatchedError *ce);
+void SetToken(const char *tokenkey, const char *token);
+void SetCcname(const char *ccname);
+void cleanup_FSManager();
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // STORAGE_SRC_STORAGE_CWRAPPER_HDFS_FILE_SYSTEM_C_H_
diff --git a/depends/storage/src/storage/cwrapper/orc-format-c.cc b/depends/storage/src/storage/cwrapper/orc-format-c.cc
new file mode 100644
index 0000000..c215f96
--- /dev/null
+++ b/depends/storage/src/storage/cwrapper/orc-format-c.cc
@@ -0,0 +1,638 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "storage/cwrapper/orc-format-c.h"
+
+#include <uuid/uuid.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "dbcommon/common/vector-transformer.h"
+#include "dbcommon/common/vector/decimal-vector.h"
+#include "dbcommon/common/vector/timestamp-vector.h"
+#include "dbcommon/filesystem/file-system.h"
+#include "dbcommon/function/decimal-function.h"
+#include "dbcommon/function/typecast-func.cg.h"
+#include "dbcommon/type/date.h"
+#include "dbcommon/type/decimal.h"
+#include "dbcommon/type/type-kind.h"
+#include "dbcommon/utils/global.h"
+#include "dbcommon/utils/url.h"
+
+#include "storage/format/format.h"
+#include "storage/format/orc/orc-format.h"
+
+#include "univplan/univplanbuilder/univplanbuilder-scan-task.h"
+
+#define NUMERIC_POS 0x0000
+#define NUMERIC_NEG 0x4000
+#define DEC_DIGITS 4
+#define NUMERIC_DSCALE_MASK 0x3FF
+#define NUMERIC_HDRSZ (sizeof(int32_t) + sizeof(uint16_t) + sizeof(int16_t))
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static void ORCFormatSetErrorORCFormatC(ORCFormatCatchedError *ce, int errCode,
+                                        const char *errMsg);
+
+typedef struct OrcColumnReader {
+  dbcommon::TypeKind type;
+  const char *value;
+  const bool *nulls;
+  const uint64_t *lens;
+  std::unique_ptr<dbcommon::ByteBuffer> valBuffer;
+} OrcColumnReader;
+
+struct ORCFormatC {
+  std::unique_ptr<storage::ORCFormat> orcFormat;  // NOLINT
+  dbcommon::URL::uptr url;
+  dbcommon::Parameters params;
+  dbcommon::TupleDesc desc;
+  ORCFormatCatchedError error;
+  std::vector<bool> columnsToRead;
+  univplan::UnivPlanScanFileSplitListList splits;
+  dbcommon::TupleBatch::uptr tb;
+  std::string insertFileName;
+
+  bool needNewTupleBatch;
+  uint64_t rowRead;
+  uint64_t rowCount;
+  std::vector<std::unique_ptr<OrcColumnReader>> columnReaders;
+  std::vector<uint32_t> colToReadIds;
+};
+
+typedef struct NumericTransData {
+  int32_t varlen;        // total size counted in byte
+  int16_t weight;        // size of integral part, counted in int16_t
+  uint16_t sign_dscale;  // sign and scale
+  int16_t digits[0];
+} NumericTransData;
+
+ORCFormatC *ORCFormatNewORCFormatC(const char *tableOptions, int segno) {
+  ORCFormatC *instance = new ORCFormatC();
+  univplan::UNIVPLANFORMATTYPE type = univplan::UNIVPLANFORMATTYPE::ORC_FORMAT;
+
+  instance->params.set("table.options", tableOptions);
+  instance->orcFormat.reset(new storage::ORCFormat(&(instance->params)));
+  instance->orcFormat->setFileSystemManager(&FSManager);
+  instance->tb = nullptr;
+  instance->url = nullptr;
+  instance->error.errCode = ERRCODE_SUCCESSFUL_COMPLETION;
+  instance->insertFileName = "/" + std::to_string(segno + 1) + "_";
+  return instance;
+}
+
+void ORCFormatBeginORCFormatC(ORCFormatC *fmt, ORCFormatFileSplit *splits,
+                              int numSplits, bool *columnsToRead,
+                              char **columnName, int *columnDatatype,
+                              uint64_t *columnDatatypeMod, int numColumns) {
+  try {
+    fmt->tb = nullptr;
+    fmt->needNewTupleBatch = true;
+    for (int i = 0; i < numColumns; ++i) {
+      fmt->columnsToRead.push_back(columnsToRead[i]);
+      fmt->desc.add(columnName[i],
+                    (static_cast<dbcommon::TypeKind>(columnDatatype[i])),
+                    columnDatatypeMod[i]);
+      if (columnsToRead[i]) {
+        std::unique_ptr<OrcColumnReader> columnReader(new OrcColumnReader);
+        columnReader->type = static_cast<dbcommon::TypeKind>(columnDatatype[i]);
+        switch (columnReader->type) {
+          case dbcommon::TypeKind::STRINGID:
+          case dbcommon::TypeKind::CHARID:
+          case dbcommon::TypeKind::VARCHARID:
+          case dbcommon::TypeKind::BINARYID:
+          case dbcommon::TypeKind::TIMESTAMPID:
+          case dbcommon::TypeKind::TIMESTAMPTZID:
+          case dbcommon::TypeKind::DECIMALID:
+            columnReader->valBuffer.reset(new dbcommon::ByteBuffer(true));
+            columnReader->valBuffer->reserve(DEFAULT_RESERVED_SIZE_OF_STRING *
+                                             DEFAULT_NUMBER_TUPLES_PER_BATCH);
+            break;
+          default:
+            columnReader->valBuffer = nullptr;
+            break;
+        }
+        fmt->columnReaders.push_back(std::move(columnReader));
+        fmt->colToReadIds.push_back(i);
+      }
+    }
+
+    // create one scan task to contain all splits
+    univplan::UnivPlanBuilderScanTask scanTaskBld;
+    // add all splits into scan task
+    for (int j = 0; j < numSplits; ++j) {
+      scanTaskBld.addScanFileSplit(splits[j].fileName, splits[j].start,
+                                   splits[j].len, -1, -1);  // no rangeid, rgid
+    }
+    // build scan task by transfering tb from this builder to fmt instance
+    std::unique_ptr<univplan::UnivPlanScanFileSplitListTb> newScanTask(
+        new univplan::UnivPlanScanFileSplitListTb(
+            std::move(scanTaskBld.releaseSplitsTb())));
+    fmt->splits.push_back(std::move(newScanTask));
+
+    fmt->orcFormat->beginScan(&(fmt->splits), &(fmt->desc),
+                              &(fmt->columnsToRead), nullptr, nullptr, false);
+  } catch (dbcommon::TransactionAbortException &e) {
+    ORCFormatSetErrorORCFormatC(&(fmt->error), e.errCode(), e.what());
+  }
+}
+
+void ORCFormatRescanORCFormatC(ORCFormatC *fmt) {
+  try {
+    fmt->orcFormat->reScan();
+  } catch (dbcommon::TransactionAbortException &e) {
+    ORCFormatSetErrorORCFormatC(&(fmt->error), e.errCode(), e.what());
+  }
+}
+
+void ORCFormatEndORCFormatC(ORCFormatC *fmt) {
+  try {
+    fmt->orcFormat->endScan();
+  } catch (dbcommon::TransactionAbortException &e) {
+    ORCFormatSetErrorORCFormatC(&(fmt->error), e.errCode(), e.what());
+  }
+}
+
+void ORCFormatBeginInsertORCFormatC(ORCFormatC *fmt, const char *dirFullPath,
+                                    char **columnName, int *columnDatatype,
+                                    uint64_t *columnDatatypeMod,
+                                    int numColumns) {
+  try {
+    fmt->tb = nullptr;
+    for (int i = 0; i < numColumns; ++i) {
+      fmt->desc.add(columnName[i],
+                    (static_cast<dbcommon::TypeKind>(columnDatatype[i])),
+                    columnDatatypeMod[i]);
+    }
+
+    std::string dirFullInsertPath(dirFullPath);
+    dirFullInsertPath += INSERT_HIDDEN_DIR;
+    fmt->url.reset(new dbcommon::URL(dirFullInsertPath));
+    dbcommon::FileSystem *fs = FSManager.get(dirFullPath);
+    std::string targetPath = fmt->url->getPath();
+    std::string targetRawPath = fmt->url->getRawString();
+    if (!fs->exists(targetPath.c_str())) {
+      LOG_ERROR(ERRCODE_DATA_EXCEPTION, "no data directory found: %s",
+                targetPath.c_str());
+    }
+    // Generate filename for current insertion.
+    uuid_t uuid;
+    char buf[1024];
+    uuid_generate_time(uuid);
+    uuid_unparse(uuid, buf);
+    fmt->insertFileName.append(buf, strlen(buf));
+
+    fmt->orcFormat->beginInsert(targetRawPath + fmt->insertFileName, fmt->desc);
+  } catch (dbcommon::TransactionAbortException &e) {
+    ORCFormatSetErrorORCFormatC(&(fmt->error), e.errCode(), e.what());
+  }
+}
+
+void ORCFormatInsertORCFormatC(ORCFormatC *fmt, int *datatypes, char **values,
+                               uint64_t *lens, unsigned char **nullBitmap,
+                               int32_t **dims, bool *isNull) {
+  try {
+    if (fmt->tb == nullptr)
+      fmt->tb.reset(new dbcommon::TupleBatch(fmt->desc, true));
+
+    dbcommon::TupleBatchWriter &writers = fmt->tb->getTupleBatchWriter();
+    int natts = fmt->desc.getNumOfColumns();
+
+    for (int i = 0; i < natts; ++i) {
+      dbcommon::TypeKind datatype =
+          (static_cast<dbcommon::TypeKind>(datatypes[i]));
+      switch (datatype) {
+        case dbcommon::TypeKind::BOOLEANID:
+          writers[i]->append(reinterpret_cast<char *>(values[i]), sizeof(bool),
+                             isNull[i]);
+          break;
+
+        case dbcommon::TypeKind::TINYINTID:
+          writers[i]->append(reinterpret_cast<char *>(values[i]),
+                             sizeof(int8_t), isNull[i]);
+          break;
+
+        case dbcommon::TypeKind::SMALLINTID:
+          writers[i]->append(reinterpret_cast<char *>(values[i]),
+                             sizeof(int16_t), isNull[i]);
+          break;
+
+        case dbcommon::TypeKind::INTID:
+        case dbcommon::TypeKind::DATEID:
+          writers[i]->append(reinterpret_cast<char *>(values[i]),
+                             sizeof(int32_t), isNull[i]);
+          break;
+
+        case dbcommon::TypeKind::BIGINTID:
+        case dbcommon::TypeKind::TIMEID:
+          writers[i]->append(reinterpret_cast<char *>(values[i]),
+                             sizeof(int64_t), isNull[i]);
+          break;
+
+        case dbcommon::TypeKind::FLOATID:
+          writers[i]->append(reinterpret_cast<char *>(values[i]), sizeof(float),
+                             isNull[i]);
+          break;
+
+        case dbcommon::TypeKind::DOUBLEID:
+          writers[i]->append(reinterpret_cast<char *>(values[i]),
+                             sizeof(double), isNull[i]);
+          break;
+
+        case dbcommon::TypeKind::CHARID:
+        case dbcommon::TypeKind::VARCHARID:
+        case dbcommon::TypeKind::STRINGID:
+        case dbcommon::TypeKind::BINARYID:
+        case dbcommon::TypeKind::DECIMALID:
+          writers[i]->append(reinterpret_cast<char *>(values[i]), isNull[i]);
+          break;
+
+        case dbcommon::TypeKind::TIMESTAMPID:
+        case dbcommon::TypeKind::TIMESTAMPTZID:
+          writers[i]->append(reinterpret_cast<char *>(values[i]),
+                             sizeof(int64_t) + sizeof(int64_t), isNull[i]);
+          break;
+
+        case dbcommon::TypeKind::SMALLINTARRAYID:
+        case dbcommon::TypeKind::INTARRAYID:
+        case dbcommon::TypeKind::BIGINTARRAYID:
+        case dbcommon::TypeKind::FLOATARRAYID:
+        case dbcommon::TypeKind::DOUBLEARRAYID: {
+          dbcommon::ListVector *lwriter =
+              reinterpret_cast<dbcommon::ListVector *>(writers[i].get());
+          lwriter->append(reinterpret_cast<char *>(values[i]), lens[i],
+                          nullBitmap[i], dims[i], isNull[i], true);
+          break;
+        }
+        case dbcommon::TypeKind::INVALIDTYPEID:
+          LOG_ERROR(ERRCODE_DATA_EXCEPTION, "data type with id %d is invalid",
+                    static_cast<int>(datatype));
+
+        default:
+          LOG_ERROR(ERRCODE_DATA_EXCEPTION,
+                    "data type with id %d is not supported yet",
+                    static_cast<int>(datatype));
+          break;
+      }
+    }
+
+    fmt->tb->incNumOfRows(1);
+    if (fmt->tb->getNumOfRows() >= storage::Format::kTuplesPerBatch) {
+      fmt->orcFormat->doInsert(std::move(fmt->tb));
+      fmt->tb = nullptr;
+    }
+  } catch (dbcommon::TransactionAbortException &e) {
+    ORCFormatSetErrorORCFormatC(&(fmt->error), e.errCode(), e.what());
+  }
+}
+
+void ORCFormatEndInsertORCFormatC(ORCFormatC *fmt) {
+  try {
+    if (fmt->tb) fmt->orcFormat->doInsert(std::move(fmt->tb));  // NOLINT
+    fmt->orcFormat->endInsert();
+    dbcommon::FileSystem *fs = FSManager.get(fmt->url->getRawString());
+    fs->rename((fmt->url->getPath() + fmt->insertFileName).c_str(),
+               (fmt->url->getPath() + "/.." + fmt->insertFileName).c_str());
+  } catch (dbcommon::TransactionAbortException &e) {
+    ORCFormatSetErrorORCFormatC(&(fmt->error), e.errCode(), e.what());
+  }
+}
+
+void ORCFormatFreeORCFormatC(ORCFormatC **fmt) {
+  if (*fmt == nullptr) return;
+  delete *fmt;
+  *fmt = nullptr;
+}
+
+ORCFormatCatchedError *ORCFormatGetErrorORCFormatC(ORCFormatC *fmt) {
+  return &(fmt->error);
+}
+
+void ORCFormatSetErrorORCFormatC(ORCFormatCatchedError *ce, int errCode,
+                                 const char *errMsg) {
+  assert(ce != nullptr);
+  ce->errCode = errCode;
+  snprintf(ce->errMessage, strlen(errMsg) + 1, "%s", errMsg);
+}
+
+static void textRelatedGetValueBuffer(ORCFormatC *fmt, dbcommon::BytesVector *v,
+                                      OrcColumnReader *reader) {
+  bool hasNull = v->hasNullValue();
+  const uint64_t *lens = v->getLengths();
+  const char **valPtrs = v->getValPtrs();
+  reader->valBuffer->clear();
+  reader->lens = lens;
+  if (hasNull) {
+    const bool *nulls = v->getNullBuffer()->getBools();
+    for (uint64_t i = 0; i < fmt->rowCount; ++i) {
+      if (!nulls[i]) {
+        uint32_t len = lens[i];
+        reader->valBuffer->append(len);
+        reader->valBuffer->append(valPtrs[i], len);
+      }
+    }
+    reader->nulls = nulls;
+  } else {
+    for (uint64_t i = 0; i < fmt->rowCount; ++i) {
+      uint32_t len = lens[i];
+      reader->valBuffer->append(len);
+      reader->valBuffer->append(valPtrs[i], len);
+    }
+    reader->nulls = nullptr;
+  }
+  reader->value = reader->valBuffer->data();
+}
+
+static void timestampGetValueBuffer(ORCFormatC *fmt,
+                                    dbcommon::TimestampVector *v,
+                                    OrcColumnReader *reader) {
+  bool hasNull = v->hasNullValue();
+  const char **valPtrs = v->getValPtrs();
+  const int64_t *second = reinterpret_cast<const int64_t *>(v->getValue());
+  const int64_t *nanosecond =
+      reinterpret_cast<const int64_t *>(v->getNanoseconds());
+  reader->valBuffer->clear();
+  if (hasNull) {
+    const bool *nulls = v->getNullBuffer()->getBools();
+    for (uint64_t i = 0; i < fmt->rowCount; ++i) {
+      if (!nulls[i]) {
+        int64_t val = (second[i] - TIMESTAMP_EPOCH_JDATE) * 1000000 +
+                      nanosecond[i] / 1000;
+        reader->valBuffer->append(val);
+      }
+    }
+    reader->nulls = nulls;
+  } else {
+    for (uint64_t i = 0; i < fmt->rowCount; ++i) {
+      int64_t val =
+          (second[i] - TIMESTAMP_EPOCH_JDATE) * 1000000 + nanosecond[i] / 1000;
+      reader->valBuffer->append(val);
+    }
+    reader->nulls = nullptr;
+  }
+  reader->value = reader->valBuffer->data();
+}
+
+static void decimalGetValueBuffer(dbcommon::DecimalVector *srcVector,
+                                  OrcColumnReader *reader) {
+  dbcommon::DecimalVectorRawData src(srcVector);
+
+  auto convertNumericTranData = [&](uint64_t plainIdx) {
+    NumericTransData numeric;
+    dbcommon::Int128 data(src.hightbits[plainIdx], src.lowbits[plainIdx]);
+
+    numeric.sign_dscale = NUMERIC_POS;
+    if (data.isNegative()) {
+      numeric.sign_dscale = NUMERIC_NEG;
+      data = data.negate();
+    }
+
+    // Pad zero for fractional part in order to make it counted by int16_t
+    int16_t scaleDigitCount = src.scales[plainIdx];
+    int16_t paddingDigitCount =
+        (DEC_DIGITS - scaleDigitCount % DEC_DIGITS) % DEC_DIGITS;
+    int16_t significantDigitCount = data.getNumOfDigit();
+
+    bool isPaddingSuffix = significantDigitCount > scaleDigitCount;
+    int16_t totalDigitCount = isPaddingSuffix
+                                  ? significantDigitCount + paddingDigitCount
+                                  : scaleDigitCount + paddingDigitCount;
+
+    numeric.sign_dscale |= (scaleDigitCount & NUMERIC_DSCALE_MASK);
+    numeric.weight = isPaddingSuffix ? (totalDigitCount - scaleDigitCount -
+                                        paddingDigitCount + (DEC_DIGITS - 1)) /
+                                               DEC_DIGITS -
+                                           1
+                                     : -1;
+    numeric.varlen =
+        NUMERIC_HDRSZ +
+        ((totalDigitCount + DEC_DIGITS - 1) / DEC_DIGITS) * sizeof(int16_t);
+
+    // Reserver buffer
+    reader->valBuffer->resize(reader->valBuffer->size() + numeric.varlen);
+
+    // Fill header
+    *reinterpret_cast<NumericTransData *>(reader->valBuffer->tail() -
+                                          numeric.varlen) = numeric;
+
+    //  Fill digits
+    __int128_t dividend =
+        (__int128_t(data.getHighBits()) << 64) + __int128_t(data.getLowBits());
+    for (int i = 0; i < paddingDigitCount; i++) dividend *= 10;
+    int16_t *ptr = reinterpret_cast<int16_t *>(reader->valBuffer->tail());
+    for (int i = 0; i < (numeric.varlen - NUMERIC_HDRSZ) / sizeof(int16_t);
+         i++) {
+      int16_t remainder = dividend % 10000;
+      *--ptr = remainder;
+      dividend /= 10000;
+    }
+    assert(reinterpret_cast<char *>(ptr) ==
+           reader->valBuffer->tail() - numeric.varlen + NUMERIC_HDRSZ);
+  };
+  reader->valBuffer->clear();
+  dbcommon::transformVector(src.plainSize, src.sel, src.nulls,
+                            convertNumericTranData);
+
+  reader->nulls = srcVector->getNulls();
+  reader->value = reader->valBuffer->data();
+}
+
+static void columnReadGetContent(ORCFormatC *fmt) {
+  const dbcommon::TupleBatchReader &tbReader = fmt->tb->getTupleBatchReader();
+  int32_t colIndex = 0;
+  for (auto plainColIndex : fmt->colToReadIds) {
+    OrcColumnReader *colReader = fmt->columnReaders[colIndex++].get();
+    switch (colReader->type) {
+      case dbcommon::TypeKind::STRINGID:
+      case dbcommon::TypeKind::CHARID:
+      case dbcommon::TypeKind::VARCHARID:
+      case dbcommon::TypeKind::BINARYID: {
+        dbcommon::BytesVector *v = dynamic_cast<dbcommon::BytesVector *>(
+            tbReader[plainColIndex].get());
+        textRelatedGetValueBuffer(fmt, v, colReader);
+        break;
+      }
+      case dbcommon::TypeKind::TIMESTAMPID:
+      case dbcommon::TypeKind::TIMESTAMPTZID: {
+        dbcommon::TimestampVector *v =
+            dynamic_cast<dbcommon::TimestampVector *>(
+                tbReader[plainColIndex].get());
+        timestampGetValueBuffer(fmt, v, colReader);
+        break;
+      }
+      case dbcommon::TypeKind::DECIMALID: {
+        dbcommon::DecimalVector *v = dynamic_cast<dbcommon::DecimalVector *>(
+            tbReader[plainColIndex].get());
+        decimalGetValueBuffer(v, colReader);
+        break;
+      }
+      case dbcommon::TypeKind::BOOLEANID:
+      case dbcommon::TypeKind::SMALLINTID:
+      case dbcommon::TypeKind::INTID:
+      case dbcommon::TypeKind::BIGINTID:
+      case dbcommon::TypeKind::FLOATID:
+      case dbcommon::TypeKind::DOUBLEID:
+      case dbcommon::TypeKind::DATEID:
+      case dbcommon::TypeKind::TIMEID: {
+        dbcommon::Vector *v =
+            dynamic_cast<dbcommon::Vector *>(tbReader[plainColIndex].get());
+        if (v->hasNullValue()) {
+          colReader->nulls = v->getNullBuffer()->getBools();
+        } else {
+          colReader->nulls = nullptr;
+        }
+        colReader->value = v->getValue();
+        break;
+      }
+      default: {
+        LOG_ERROR(ERRCODE_DATA_EXCEPTION, "not supported yet");
+        break;
+      }
+    }
+  }
+}
+
+bool ORCFormatNextORCFormatC(ORCFormatC *fmt, const char **values,
+                             uint64_t *lens, bool *nulls) {
+  try {
+  begin:
+    if (fmt->needNewTupleBatch) {
+      fmt->tb = fmt->orcFormat->next();
+      if (fmt->tb == nullptr) {
+        return false;
+      }
+      fmt->needNewTupleBatch = false;
+      fmt->rowRead = 0;
+      fmt->rowCount = fmt->tb->getNumOfRows();
+      if (fmt->rowCount > 0) columnReadGetContent(fmt);
+    }
+
+    if (fmt->rowRead < fmt->rowCount) {
+      int32_t colIndex = 0;
+      for (auto plainColIndex : fmt->colToReadIds) {
+        OrcColumnReader *reader = fmt->columnReaders[colIndex++].get();
+        switch (reader->type) {
+          case dbcommon::TypeKind::STRINGID:
+          case dbcommon::TypeKind::CHARID:
+          case dbcommon::TypeKind::VARCHARID:
+          case dbcommon::TypeKind::BINARYID: {
+            if (reader->nulls && reader->nulls[fmt->rowRead]) {
+              nulls[plainColIndex] = true;
+            } else {
+              nulls[plainColIndex] = false;
+              values[plainColIndex] = reader->value;
+              lens[plainColIndex] = reader->lens[fmt->rowRead] + 4;
+              reader->value += lens[plainColIndex];
+            }
+            break;
+          }
+          case dbcommon::TypeKind::BOOLEANID: {
+            if (reader->nulls && reader->nulls[fmt->rowRead]) {
+              nulls[plainColIndex] = true;
+            } else {
+              nulls[plainColIndex] = false;
+              values[plainColIndex] = reader->value;
+            }
+            reader->value += 1;
+            break;
+          }
+          case dbcommon::TypeKind::SMALLINTID: {
+            if (reader->nulls && reader->nulls[fmt->rowRead]) {
+              nulls[plainColIndex] = true;
+            } else {
+              nulls[plainColIndex] = false;
+              values[plainColIndex] = reader->value;
+            }
+            reader->value += 2;
+            break;
+          }
+          case dbcommon::TypeKind::INTID:
+          case dbcommon::TypeKind::FLOATID:
+          case dbcommon::TypeKind::DATEID: {
+            if (reader->nulls && reader->nulls[fmt->rowRead]) {
+              nulls[plainColIndex] = true;
+            } else {
+              nulls[plainColIndex] = false;
+              values[plainColIndex] = reader->value;
+            }
+            reader->value += 4;
+            break;
+          }
+          case dbcommon::TypeKind::BIGINTID:
+          case dbcommon::TypeKind::DOUBLEID:
+          case dbcommon::TypeKind::TIMEID: {
+            if (reader->nulls && reader->nulls[fmt->rowRead]) {
+              nulls[plainColIndex] = true;
+            } else {
+              nulls[plainColIndex] = false;
+              values[plainColIndex] = reader->value;
+            }
+            reader->value += 8;
+            break;
+          }
+          case dbcommon::TypeKind::TIMESTAMPID:
+          case dbcommon::TypeKind::TIMESTAMPTZID: {
+            if (reader->nulls && reader->nulls[fmt->rowRead]) {
+              nulls[plainColIndex] = true;
+            } else {
+              nulls[plainColIndex] = false;
+              values[plainColIndex] = reader->value;
+              reader->value += 8;
+            }
+            break;
+          }
+          case dbcommon::TypeKind::DECIMALID: {
+            if (reader->nulls && reader->nulls[fmt->rowRead]) {
+              nulls[plainColIndex] = true;
+            } else {
+              nulls[plainColIndex] = false;
+              values[plainColIndex] = reader->value;
+              lens[plainColIndex] =
+                  (reinterpret_cast<const NumericTransData *>(reader->value))
+                      ->varlen;
+              reader->value += lens[plainColIndex];
+            }
+            break;
+          }
+          default: {
+            LOG_ERROR(ERRCODE_DATA_EXCEPTION, "not supported yet");
+            break;
+          }
+        }
+      }
+      ++fmt->rowRead;
+    } else {
+      fmt->needNewTupleBatch = true;
+      goto begin;
+    }
+    return true;
+  } catch (dbcommon::TransactionAbortException &e) {
+    ORCFormatSetErrorORCFormatC(&(fmt->error), e.errCode(), e.what());
+    return false;
+  }
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/depends/storage/src/storage/cwrapper/orc-format-c.h b/depends/storage/src/storage/cwrapper/orc-format-c.h
new file mode 100644
index 0000000..8b423da
--- /dev/null
+++ b/depends/storage/src/storage/cwrapper/orc-format-c.h
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef STORAGE_SRC_STORAGE_CWRAPPER_ORC_FORMAT_C_H_
+#define STORAGE_SRC_STORAGE_CWRAPPER_ORC_FORMAT_C_H_
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef ERROR_MESSAGE_BUFFER_SIZE
+#define ERROR_MESSAGE_BUFFER_SIZE 4096
+#endif
+
+struct ORCFormatC;
+
+typedef struct ORCFormatC ORCFormatC;
+
+typedef struct ORCFormatCatchedError {
+  int errCode;
+  char errMessage[ERROR_MESSAGE_BUFFER_SIZE];
+} ORCFormatCatchedError;
+
+typedef struct ORCFormatFileSplit {
+  char *fileName;
+  int64_t start;
+  int64_t len;
+} ORCFormatFileSplit;
+
+#define ORCFormatType 'o'
+
+// tableOptions in json format
+ORCFormatC *ORCFormatNewORCFormatC(const char *tableOptions, int segno);
+void ORCFormatFreeORCFormatC(ORCFormatC **fmt);
+
+void ORCFormatBeginORCFormatC(ORCFormatC *fmt, ORCFormatFileSplit *splits,
+                              int numSplits, bool *columnsToRead,
+                              char **columnName, int *columnDatatype,
+                              uint64_t *columnDatatypeMod, int numColumns);
+
+bool ORCFormatNextORCFormatC(ORCFormatC *fmt, const char **values,
+                             uint64_t *lens, bool *nulls);
+
+void ORCFormatRescanORCFormatC(ORCFormatC *fmt);
+
+void ORCFormatEndORCFormatC(ORCFormatC *fmt);
+
+void ORCFormatBeginInsertORCFormatC(ORCFormatC *fmt, const char *dirFullPath,
+                                    char **columnName, int *columnDatatype,
+                                    uint64_t *columnDatatypeMod,
+                                    int numColumns);
+void ORCFormatInsertORCFormatC(ORCFormatC *fmt, int *datatypes, char **values,
+                               uint64_t *lens, unsigned char **nullBitmap,
+                               int32_t **dims, bool *isNull);
+void ORCFormatEndInsertORCFormatC(ORCFormatC *fmt);
+
+ORCFormatCatchedError *ORCFormatGetErrorORCFormatC(ORCFormatC *fmt);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // STORAGE_SRC_STORAGE_CWRAPPER_ORC_FORMAT_C_H_
diff --git a/depends/storage/src/storage/format/format.cc b/depends/storage/src/storage/format/format.cc
new file mode 100644
index 0000000..fc2c04e
--- /dev/null
+++ b/depends/storage/src/storage/format/format.cc
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "dbcommon/common/tuple-desc.h"
+#include "dbcommon/filesystem/file-system-manager.h"
+#include "dbcommon/utils/parameters.h"
+#include "dbcommon/utils/url.h"
+
+#include "storage/format/format.h"
+#include "storage/format/orc/orc-format.h"
+
+#include "univplan/univplanbuilder/univplanbuilder-scan-task.h"
+
+namespace storage {
+
+std::unique_ptr<Format> Format::createFormat(
+    univplan::UNIVPLANFORMATTYPE type) {
+  return createFormat(type, nullptr);
+}
+
+std::unique_ptr<Format> Format::createFormat(univplan::UNIVPLANFORMATTYPE type,
+                                             dbcommon::Parameters *p) {
+  std::unique_ptr<Format> format;
+  switch (type) {
+    case univplan::UNIVPLANFORMATTYPE::ORC_FORMAT: {
+      format.reset(new ORCFormat(p));
+      break;
+    }
+    default: {
+      LOG_ERROR(ERRCODE_INVALID_PARAMETER_VALUE, "invalid format %d", type);
+    }
+  }
+  return std::move(format);
+}
+
+//
+// we use fileLenghts as input since if we implement transaction,
+// we can not get file EOF correctly without knowing more information.
+//
+// for now, we simply allocate average length of file total size(avgLength) to
+// each task.
+// the splits in each task may contains several files(their total length equals
+// to avgLength).
+//
+std::unique_ptr<univplan::UnivPlanScanFileSplitListList> Format::createTasks(
+    const std::vector<std::unique_ptr<Input> > &inputs, int nWorker) {
+  LOG_INFO("createTasks is called");
+
+  assert(nWorker > 0);
+
+  std::unique_ptr<univplan::UnivPlanScanFileSplitListList> taskList(
+      new univplan::UnivPlanScanFileSplitListList);
+  // create one scan task to contain all splits
+  univplan::UnivPlanBuilderScanTask scanTaskBld;
+  for (const std::unique_ptr<Input> &file : inputs) {
+    FileInput *fi = static_cast<FileInput *>(file.get());
+    dbcommon::URL urlParser(fi->getName());
+    dbcommon::FileSystem *fs =
+        fsManager->get(urlParser.getNormalizedServiceName());
+    std::vector<std::unique_ptr<dbcommon::FileBlockLocation> > locations =
+        fs->getFileBlockLocation(urlParser.getPath().c_str(), 0, fi->getSize());
+    for (const std::unique_ptr<dbcommon::FileBlockLocation> &loc : locations) {
+      scanTaskBld.addScanFileSplit(fi->getName().c_str(), loc->offset,
+                                   loc->length, -1, -1);  // no rangeid, rgid
+    }
+  }
+  // build scan task by transfering tb from this builder to fmt instance
+  std::unique_ptr<univplan::UnivPlanScanFileSplitListTb> newScanTask(
+      new univplan::UnivPlanScanFileSplitListTb(
+          std::move(scanTaskBld.releaseSplitsTb())));
+
+  // newScanTask->debugOuput();
+
+  taskList->push_back(std::move(newScanTask));
+  return std::move(taskList);
+}
+
+}  // namespace storage
diff --git a/depends/storage/src/storage/format/format.h b/depends/storage/src/storage/format/format.h
new file mode 100644
index 0000000..63559ee
--- /dev/null
+++ b/depends/storage/src/storage/format/format.h
@@ -0,0 +1,223 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef STORAGE_SRC_STORAGE_FORMAT_FORMAT_H_
+#define STORAGE_SRC_STORAGE_FORMAT_FORMAT_H_
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "dbcommon/common/tuple-batch.h"
+#include "dbcommon/common/tuple-desc.h"
+#include "dbcommon/filesystem/file-system-manager.h"
+#include "dbcommon/filesystem/file-system.h"
+#include "dbcommon/utils/macro.h"
+#include "dbcommon/utils/parameters.h"
+#include "univplan/common/univplan-type.h"
+
+namespace storage {
+
+class FileSystemManagerInterface;
+
+class Input {
+ public:
+  Input() {}
+  virtual ~Input() {}
+
+  // Get input name
+  // @return The input name
+  virtual const std::string &getName() const = 0;
+
+  // Get input size
+  // @return The input size
+  virtual int64_t getSize() const = 0;
+};
+
+class FileInput : public Input {
+ public:
+  FileInput(const char *fileName, int64_t length) {
+    this->fileName = fileName;
+    this->length = length;
+  }
+  FileInput(const std::string &fileName, int64_t length) {
+    this->fileName = fileName;
+    this->length = length;
+  }
+  FileInput(FileInput &&file) {  // NOLINT
+    this->fileName = std::move(file.fileName);
+    this->length = file.length;
+  }
+  FileInput(const FileInput &file) {
+    this->fileName = file.fileName;
+    this->length = file.length;
+  }
+  FileInput &operator=(const FileInput &file) {
+    this->fileName = file.fileName;
+    this->length = file.length;
+    return *this;
+  }
+
+  virtual ~FileInput() {}
+
+  const std::string &getName() const override { return fileName; }
+
+  int64_t getSize() const override { return length; }
+
+ private:
+  std::string fileName;
+  int64_t length;
+};
+
+// This struct was added to make the foramt interface extensible
+typedef struct FormatContext {
+  univplan::UnivPlanExprPolyList indexExpr;
+} FormatContext;
+
+// Format "read" accepts a list of splits, and return TupleBatches one by one.
+// And Format "write" accept TupleBatches, and write them to storage. It is
+// quite like InputFormat/OutputFormat of MR.
+//
+// It is a general concept, not only about concrete file format.
+// It can be FAST format files on HDFS, Text files on HDFS,
+// even HBase format. So it is extensible.
+// Users should be able to write their only format.
+// For example, users can write a PostgresqlFormat to read and write data
+// to a postgresql server.
+
+class Format {
+ public:
+  Format() {}
+
+  virtual ~Format() {}
+
+  void setFileSystemManager(dbcommon::FileSystemManagerInterface *fsManager) {
+    this->fsManager = fsManager;
+  }
+
+  // Begin scan of the splits
+  // @param splits The file splits need to be scanned
+  // @param tupleDesc The tuple description for the target table
+  // @param projectionCols The project columns list
+  // @param filterExpr The filter expression
+  // @param readStatsOnly To indicate if read only statistics
+  // @return void
+  virtual void beginScan(const univplan::UnivPlanScanFileSplitListList *splits,
+                         const dbcommon::TupleDesc *tupleDesc,
+                         const std::vector<bool> *projectionCols,
+                         const univplan::UnivPlanExprPolyList *filterExpr,
+                         const FormatContext *formatContext,
+                         bool readStatsOnly) = 0;
+
+  // Get next TupleBatch
+  // @return unique_ptr of dbcommon::TupleBatch
+  virtual std::unique_ptr<dbcommon::TupleBatch> next() = 0;
+
+  // End the scan
+  // @return void
+  virtual void endScan() = 0;
+
+  // Restart the scan
+  // @return void
+  virtual void reScan() = 0;
+
+  // Stop the scan
+  // @return void
+  virtual void stopScan() = 0;
+
+  // Begin insert
+  // @param targetName The target name. For 'fast' and 'text' format, it
+  // is the target file name. For 'hbase' format, it is the target table.
+  // @param td The tuple description
+  // @return void
+  virtual void beginInsert(const std::string &targetName,
+                           const dbcommon::TupleDesc &tupleDesc) = 0;
+
+  // Insert a tuple batch
+  // @param tb The tuple batch to be inserted
+  virtual void doInsert(std::unique_ptr<dbcommon::TupleBatch> tb) = 0;
+
+  // End insert
+  virtual void endInsert() = 0;
+
+  // Begin update
+  // @param targetName The target name. For 'fast' and 'text' format, it
+  // is the target file name. For 'hbase' format, it is the target table.
+  // @param td The tuple description
+  // @return void
+  virtual void beginUpdate(const std::string &targetName,
+                           const dbcommon::TupleDesc &tupleDesc) = 0;
+
+  // Update a tuple batch
+  // @param tb The tuple batch to be updated
+  virtual void doUpdate(std::unique_ptr<dbcommon::TupleBatch> tb) = 0;
+
+  // End update
+  virtual void endUpdate() = 0;
+
+  // Begin delete
+  // @param targetName The target name. For 'fast' and 'text' format, it
+  // is the target file name. For 'hbase' format, it is the target table.
+  // @param td The tuple description
+  // @return void
+  virtual void beginDelete(const std::string &targetName,
+                           const dbcommon::TupleDesc &tupleDesc) = 0;
+
+  // Delete a tuple batch
+  // @param tb The tuple batch to be deleted
+  virtual void doDelete(std::unique_ptr<dbcommon::TupleBatch> tb) = 0;
+
+  // End delete
+  virtual void endDelete() = 0;
+
+  // Create tasks given input and the number of workers
+  // @param files The input files
+  // @param nWorker The number of workers
+  // @return The list of tasks, each worker has one task assigned.
+  // it is possible if there is no splits in a task
+  // when there is no enough splits (each task has a split list)
+  virtual std::unique_ptr<univplan::UnivPlanScanFileSplitListList> createTasks(
+      const std::vector<std::unique_ptr<Input> > &inputs, int nWorker);
+
+  // set & get user command for external table
+  std::string getUserCommand() const { return userCommand; }
+  void setUserCommand(std::string command) { userCommand = command; }
+  virtual void setCancelled() {}
+
+  static std::unique_ptr<Format> createFormat(
+      univplan::UNIVPLANFORMATTYPE type);
+  static std::unique_ptr<Format> createFormat(univplan::UNIVPLANFORMATTYPE type,
+                                              dbcommon::Parameters *p);
+
+  static const int kTuplesPerBatch = DEFAULT_NUMBER_TUPLES_PER_BATCH;
+  static const int kBlockSize = DEFAULT_BLOCK_SIZE;
+
+ protected:
+  // Format does not own splits, so it does not delete it in destructor.
+  const univplan::UnivPlanScanFileSplitListList *splits = nullptr;
+  // The file system manager used to get the file system
+  dbcommon::FileSystemManagerInterface *fsManager = nullptr;
+  // user command for external table such as DBGEN
+  std::string userCommand = "";
+};
+
+}  // namespace storage
+
+#endif  // STORAGE_SRC_STORAGE_FORMAT_FORMAT_H_
diff --git a/depends/storage/src/storage/format/orc/README b/depends/storage/src/storage/format/orc/README
new file mode 100644
index 0000000..2ba5af3
--- /dev/null
+++ b/depends/storage/src/storage/format/orc/README
@@ -0,0 +1,320 @@
+
+1. ORC read process:
+
+FileInputStream -> SnappyDecompressionStream -> RLE decoder -> raw bytes
+
+2. ORC write process
+
+raw bytes -> RLE encoder -> SnappyCompressionStream -> FileOutputStream 
+
+3. TODOs
+
+1) url format “hdfs://localhost:8020/user/hive/warehouse/tsmallint/” or “hdfs://localhost:8020//user/hive/warehouse/tsmallint” are not  correct
+
+2) now some types are supported: tinyint, smallint, int, bigint,float, double, string, varchar
+need more type support: boolean/date/char(x)/timestamp/decimal/struct et al
+
+TO add a type support, we need to pay special attention to:
+a. TypeImpl::createRowBatch: this function returns correct ColumnVectorBatch
+b. buildReader -- this function returns reader of correct type
+
+3) null value handling performance enhancement
+
+4) writer
+
+  orc::WriterOptions opts;
+  std::unique_ptr<orc::Writer> writer;
+  dbcommon::URL url(filename);
+  dbcommon::FileSystemManager fsm;
+  dbcommon::FileSystem *fs = fsm.get(url.getNormalizedServiceName());
+  writer = orc::createWriter(orc::writeFile(fs, url.getPath()), opts);
+
+  std::unique_ptr<orc::ColumnVectorBatch> batch = writer->createRowBatch(1000);
+  for (uint32_t i = 0; i < 100; i++) {
+    writer->addRowBatch(batch);
+  }
+  writer->close();
+
+5) hasEnoughSpaceForBatch needs to be revisited
+    // estimated tuple batch size
+    // TODO(lei): might need to be revised after we figure out how to
+    // store other types
+
+6) read footer only once on master, then dispatch it to worker. this can potentially
+   avoid all opening footers at the same time
+
+7) add some boundary numbers tests for orc format: for example, max(int32_t) for different
+   encoding schemes - delta, direct, patchedbase, short repeat.
+
+8) need to compare the performance & compression ration for lz4 and snappy
+
+9) write more information to orc file: statistics, indexes. otherwise,
+   it needs data reloading when we added the feature in reader.
+
+10) add tests for snappy (since lz4 is now default), and add tests for snappycompressor/lz4compressor
+
+4. How to use
+
+-- hive
+create table tcn(t tinyint, s smallint, i int, b bigint, f float, d double, str string, v varchar(10), c char(4), bin binary) stored as orc;
+
+insert into tcn values (1, 2, 3, 4, 1.1, 1.2, 'string', 'var', 'char', 'binary');
+
+select * from tcn;
+
+-- computenode
+create table tcn(t tinyint, s smallint, i int, b bigint, f float, d double, str string, v varchar(10), c string, bin string)
+with (format = orc, location= 'hdfs://localhost:8020/user/hive/warehouse/tcn');
+
+select * from tcn;
+
+5. micro benchmark: based on 2016 Oct 15 version (after analyze)
+
+NOTE: Analyze is very important for aggregation.
+
+                      lz4       fast      snappy   orcnone   postgres
+filesize             426MB      1042MB    402MB    696M      985MB
+load                 20239ms    20784ms   20098ms  19944ms   22767ms(copy)
+count*               32ms       32ms      31ms     30ms      639ms 
+countint             96ms       42ms      98ms     97ms      745ms
+countstring          179ms      85ms      235ms    120ms     1154ms
+count2int2string     444ms      165ms     501ms    365ms     1596ms
+tpch-Q1              1025ms     386ms     1072ms   896ms     3830ms
+
+
+The benchmark used is:
+1) schema
+CREATE TABLE e_LINEITEM ( L_ORDERKEY  int,
+                              L_PARTKEY  int,
+                              L_SUPPKEY  int,
+                              L_LINENUMBER int,
+                              L_QUANTITY      double,
+                              L_EXTENDEDPRICE    double,
+                              L_DISCOUNT      double,
+                              L_TAX    double,
+                              L_RETURNFLAG  string,
+                              L_LINESTATUS  string,
+                              L_SHIPDATE   string,
+                              L_COMMITDATE  string,
+                              L_RECEIPTDATE string,
+                              L_SHIPINSTRUCT string,
+                              L_SHIPMODE  string,
+                              L_COMMENT string)  with (FORMAT = command, COMMANDS = '/Users/ChangLei/curwork/dev/computenode/inst/bin/dbgen -b /Users/ChangLei/curwork/dev/computenode/inst/bin/dists.dss -T L -s 1 -C 2 -S $TASKNO', TaskCount = 2);
+                              
+CREATE TABLE lineitem_orc_lz4 ( L_ORDERKEY  int,
+                        L_PARTKEY  int,
+                        L_SUPPKEY  int,
+                        L_LINENUMBER int,
+                        L_QUANTITY      double,
+                        L_EXTENDEDPRICE double,
+                        L_DISCOUNT      double,
+                        L_TAX    double,
+                        L_RETURNFLAG  string,
+                        L_LINESTATUS  string,
+                        L_SHIPDATE   string,
+                        L_COMMITDATE  string,
+                        L_RECEIPTDATE string,
+                        L_SHIPINSTRUCT string,
+                        L_SHIPMODE  string,
+                        L_COMMENT string) with(format = orc, location='file:///tmp/lineitem_orc_lz4');
+                        
+CREATE TABLE lineitem_fast ( L_ORDERKEY  int,
+                        L_PARTKEY  int,
+                        L_SUPPKEY  int,
+                        L_LINENUMBER int,
+                        L_QUANTITY      double,
+                        L_EXTENDEDPRICE double,
+                        L_DISCOUNT      double,
+                        L_TAX    double,
+                        L_RETURNFLAG  string,
+                        L_LINESTATUS  string,
+                        L_SHIPDATE   string,
+                        L_COMMITDATE  string,
+                        L_RECEIPTDATE string,
+                        L_SHIPINSTRUCT string,
+                        L_SHIPMODE  string,
+                        L_COMMENT string) with(format = fast, location='file:///tmp/lineitem_fast');
+                        
+CREATE TABLE lineitem_orc_snappy ( L_ORDERKEY  int,
+                        L_PARTKEY  int,
+                        L_SUPPKEY  int,
+                        L_LINENUMBER int,
+                        L_QUANTITY      double,
+                        L_EXTENDEDPRICE double,
+                        L_DISCOUNT      double,
+                        L_TAX    double,
+                        L_RETURNFLAG  string,
+                        L_LINESTATUS  string,
+                        L_SHIPDATE   string,
+                        L_COMMITDATE  string,
+                        L_RECEIPTDATE string,
+                        L_SHIPINSTRUCT string,
+                        L_SHIPMODE  string,
+                        L_COMMENT string) with(format = orc, location='file:///tmp/lineitem_orc_snappy');
+                                                
+CREATE TABLE lineitem_orc_none ( L_ORDERKEY  int,
+                        L_PARTKEY  int,
+                        L_SUPPKEY  int,
+                        L_LINENUMBER int,
+                        L_QUANTITY      double,
+                        L_EXTENDEDPRICE double,
+                        L_DISCOUNT      double,
+                        L_TAX    double,
+                        L_RETURNFLAG  string,
+                        L_LINESTATUS  string,
+                        L_SHIPDATE   string,
+                        L_COMMITDATE  string,
+                        L_RECEIPTDATE string,
+                        L_SHIPINSTRUCT string,
+                        L_SHIPMODE  string,
+                        L_COMMENT string) with(format = orc, location='file:///tmp/lineitem_orc_none'); 
+
+CREATE TABLE lineitem_pg ( L_ORDERKEY  int,
+                              L_PARTKEY  int,
+                              L_SUPPKEY  int,
+                              L_LINENUMBER int,
+                              L_QUANTITY      double precision,
+                              L_EXTENDEDPRICE    double precision,
+                              L_DISCOUNT      double precision,
+                              L_TAX    double precision,
+                              L_RETURNFLAG  varchar,
+                              L_LINESTATUS  varchar,
+                              L_SHIPDATE   varchar,
+                              L_COMMITDATE  varchar,
+                              L_RECEIPTDATE varchar,
+                              L_SHIPINSTRUCT varchar,
+                              L_SHIPMODE  varchar,
+                              L_COMMENT varchar);
+                        
+2) loading
+insert into lineitem_orc_lz4 select * from e_lineitem;
+insert into lineitem_fast select * from e_lineitem;
+insert into lineitem_orc_snappy select * from e_lineitem;
+insert into lineitem_orc_none select * from e_lineitem;
+copy lineitem_pg from '/Users/ChangLei/curwork/dev/tpch-dbgen/lineitem.tbl' with delimiter '|';
+
+analyze lineitem_orc_lz4;
+analyze lineitem_fast;
+analyze lineitem_orc_snappy;
+analyze lineitem_orc_none;
+analyze lineitem_pg;
+
+
+3) count*
+select count(*) from lineitem_orc_lz4;
+select count(*) from lineitem_fast;
+select count(*) from lineitem_orc_snappy;
+select count(*) from lineitem_orc_none;
+select count(*) from lineitem_pg;
+
+4) countint
+select count(L_ORDERKEY) from lineitem_orc_lz4;
+select count(L_ORDERKEY) from lineitem_fast;
+select count(L_ORDERKEY) from lineitem_orc_snappy;
+select count(L_ORDERKEY) from lineitem_orc_none;
+select count(L_ORDERKEY) from lineitem_pg;
+
+5) countstring
+select count(L_COMMENT) from lineitem_orc_lz4;
+select count(L_COMMENT) from lineitem_fast;
+select count(L_COMMENT) from lineitem_orc_snappy;
+select count(L_COMMENT) from lineitem_orc_none;
+select count(L_COMMENT) from lineitem_pg;
+
+6) count2int2string
+select count(L_ORDERKEY), count(L_SUPPKEY), count(L_SHIPMODE), count(L_COMMENT) from lineitem_orc_lz4;
+select count(L_ORDERKEY), count(L_SUPPKEY), count(L_SHIPMODE), count(L_COMMENT)  from lineitem_fast;
+select count(L_ORDERKEY), count(L_SUPPKEY), count(L_SHIPMODE), count(L_COMMENT)  from lineitem_orc_snappy;
+select count(L_ORDERKEY), count(L_SUPPKEY), count(L_SHIPMODE), count(L_COMMENT)  from lineitem_orc_none;
+select count(L_ORDERKEY), count(L_SUPPKEY), count(L_SHIPMODE), count(L_COMMENT)  from lineitem_pg;
+
+7) tpch-Q1
+
+SELECT
+    l_returnflag,
+    l_linestatus,
+    sum(l_quantity) as sum_qty,
+    sum(l_extendedprice) as sum_base_price,
+    sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
+    sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
+    sum(l_quantity) as avg_qty,
+    sum(l_extendedprice) as avg_price,
+    sum(l_discount) as avg_disc,
+    count(*) as count_order
+FROM
+    lineitem_orc_lz4
+GROUP BY
+    l_returnflag,
+    l_linestatus;
+    
+SELECT
+    l_returnflag,
+    l_linestatus,
+    sum(l_quantity) as sum_qty,
+    sum(l_extendedprice) as sum_base_price,
+    sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
+    sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
+    sum(l_quantity) as avg_qty,
+    sum(l_extendedprice) as avg_price,
+    sum(l_discount) as avg_disc,
+    count(*) as count_order
+FROM
+    lineitem_fast
+GROUP BY
+    l_returnflag,
+    l_linestatus;
+
+
+SELECT
+    l_returnflag,
+    l_linestatus,
+    sum(l_quantity) as sum_qty,
+    sum(l_extendedprice) as sum_base_price,
+    sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
+    sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
+    sum(l_quantity) as avg_qty,
+    sum(l_extendedprice) as avg_price,
+    sum(l_discount) as avg_disc,
+    count(*) as count_order
+FROM
+    lineitem_orc_snappy
+GROUP BY
+    l_returnflag,
+    l_linestatus;
+
+
+SELECT
+    l_returnflag,
+    l_linestatus,
+    sum(l_quantity) as sum_qty,
+    sum(l_extendedprice) as sum_base_price,
+    sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
+    sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
+    sum(l_quantity) as avg_qty,
+    sum(l_extendedprice) as avg_price,
+    sum(l_discount) as avg_disc,
+    count(*) as count_order
+FROM
+    lineitem_orc_none
+GROUP BY
+    l_returnflag,
+    l_linestatus;
+    
+SELECT
+    l_returnflag,
+    l_linestatus,
+    sum(l_quantity) as sum_qty,
+    sum(l_extendedprice) as sum_base_price,
+    sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
+    sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
+    sum(l_quantity) as avg_qty,
+    sum(l_extendedprice) as avg_price,
+    sum(l_discount) as avg_disc,
+    count(*) as count_order
+FROM
+    lineitem_pg
+GROUP BY
+    l_returnflag,
+    l_linestatus;
+        
+        
diff --git a/depends/storage/src/storage/format/orc/byte-rle.cc b/depends/storage/src/storage/format/orc/byte-rle.cc
new file mode 100644
index 0000000..724a43a
--- /dev/null
+++ b/depends/storage/src/storage/format/orc/byte-rle.cc
@@ -0,0 +1,476 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <immintrin.h>
+#include <string.h>
+
+#include <algorithm>
+#include <iostream>
+#include <utility>
+
+#include "storage/format/orc/byte-rle.h"
+#include "storage/format/orc/exceptions.h"
+
+namespace orc {
+
+const size_t MINIMUM_REPEAT = 3;
+
+ByteRleDecoder::~ByteRleDecoder() {
+  // PASS
+}
+
+void ByteRleDecoderImpl::nextBuffer() {
+  int bufferLength = 0;
+  const void *bufferPointer = nullptr;
+  bool result = inputStream->Next(&bufferPointer, &bufferLength);
+  if (!result) {
+    LOG_ERROR(ERRCODE_INTERNAL_ERROR, "bad read in nextBuffer");
+  }
+  bufferStart = static_cast<const char *>(bufferPointer);
+  bufferEnd = bufferStart + bufferLength;
+}
+
+signed char ByteRleDecoderImpl::readByte() {
+  if (bufferStart == bufferEnd) {
+    nextBuffer();
+  }
+  return *(bufferStart++);
+}
+
+void ByteRleDecoderImpl::readHeader() {
+  signed char ch = readByte();
+  if (ch < 0) {
+    remainingValues = static_cast<size_t>(-ch);
+    repeating = false;
+  } else {
+    remainingValues = static_cast<size_t>(ch) + MINIMUM_REPEAT;
+    repeating = true;
+    value = readByte();
+  }
+}
+
+ByteRleDecoderImpl::ByteRleDecoderImpl(
+    std::unique_ptr<SeekableInputStream> input) {
+  inputStream = std::move(input);
+  repeating = false;
+  remainingValues = 0;
+  value = 0;
+  bufferStart = 0;
+  bufferEnd = 0;
+}
+
+ByteRleDecoderImpl::~ByteRleDecoderImpl() {
+  // PASS
+}
+
+void ByteRleDecoderImpl::seek(PositionProvider &location) {
+  // move the input stream
+  inputStream->seek(location);
+  // force a re-read from the stream
+  bufferEnd = bufferStart;
+  // read a new header
+  readHeader();
+  // skip ahead the given number of records
+  ByteRleDecoderImpl::skip(location.next());
+}
+
+void ByteRleDecoderImpl::skip(uint64_t numValues) {
+  while (numValues > 0) {
+    if (remainingValues == 0) {
+      readHeader();
+    }
+    size_t count = std::min(static_cast<size_t>(numValues), remainingValues);
+    remainingValues -= count;
+    numValues -= count;
+    // for literals we need to skip over count bytes, which may involve
+    // reading from the underlying stream
+    if (!repeating) {
+      size_t consumedBytes = count;
+      while (consumedBytes > 0) {
+        if (bufferStart == bufferEnd) {
+          nextBuffer();
+        }
+        size_t skipSize =
+            std::min(static_cast<size_t>(consumedBytes),
+                     static_cast<size_t>(bufferEnd - bufferStart));
+        bufferStart += skipSize;
+        consumedBytes -= skipSize;
+      }
+    }
+  }
+}
+
+void ByteRleDecoderImpl::next(char *data, uint64_t numValues,
+                              const char *notNull) {
+  uint64_t position = 0;
+  // skip over null values
+  while (notNull && position < numValues && !notNull[position]) {
+    position += 1;
+  }
+  while (position < numValues) {
+    // if we are out of values, read more
+    if (remainingValues == 0) {
+      readHeader();
+    }
+    // how many do we read out of this block?
+    size_t count =
+        std::min(static_cast<size_t>(numValues - position), remainingValues);
+    uint64_t consumed = 0;
+    if (repeating) {
+      if (notNull) {
+        for (uint64_t i = 0; i < count; ++i) {
+          if (notNull[position + i]) {
+            data[position + i] = value;
+            consumed += 1;
+          }
+        }
+      } else {
+        memset(data + position, value, count);
+        consumed = count;
+      }
+    } else {
+      if (notNull) {
+        for (uint64_t i = 0; i < count; ++i) {
+          if (notNull[position + i]) {
+            data[position + i] = readByte();
+            consumed += 1;
+          }
+        }
+      } else {
+        uint64_t i = 0;
+        while (i < count) {
+          if (bufferStart == bufferEnd) {
+            nextBuffer();
+          }
+          uint64_t copyBytes =
+              std::min(static_cast<uint64_t>(count - i),
+                       static_cast<uint64_t>(bufferEnd - bufferStart));
+          memcpy(data + position + i, bufferStart, copyBytes);
+          bufferStart += copyBytes;
+          i += copyBytes;
+        }
+        consumed = count;
+      }
+    }
+    remainingValues -= consumed;
+    position += count;
+    // skip over any null values
+    while (notNull && position < numValues && !notNull[position]) {
+      position += 1;
+    }
+  }
+}
+
+std::unique_ptr<ByteRleDecoder> createByteRleDecoder(
+    std::unique_ptr<SeekableInputStream> input) {
+  return std::unique_ptr<ByteRleDecoder>(
+      new ByteRleDecoderImpl(std::move(input)));
+}
+
+class BooleanRleDecoderImpl : public ByteRleDecoderImpl {
+ public:
+  explicit BooleanRleDecoderImpl(std::unique_ptr<SeekableInputStream> input);
+
+  virtual ~BooleanRleDecoderImpl();
+
+  void seek(PositionProvider &) override;
+
+  void skip(uint64_t numValues) override;
+
+  void next(char *data, uint64_t numValues, const char *notNull) override;
+
+ protected:
+  size_t remainingBits = 0;
+  char lastByte = 0;
+};
+
+BooleanRleDecoderImpl::BooleanRleDecoderImpl(
+    std::unique_ptr<SeekableInputStream> input)
+    : ByteRleDecoderImpl(std::move(input)) {}
+
+BooleanRleDecoderImpl::~BooleanRleDecoderImpl() {
+  // PASS
+}
+
+void BooleanRleDecoderImpl::seek(PositionProvider &location) {
+  ByteRleDecoderImpl::seek(location);
+  uint64_t consumed = location.next();
+  if (consumed > 8) {
+    throw ParseError("bad position");
+  }
+  if (consumed != 0) {
+    remainingBits = 8 - consumed;
+    ByteRleDecoderImpl::next(&lastByte, 1, nullptr);
+  }
+}
+
+void BooleanRleDecoderImpl::skip(uint64_t numValues) {
+  if (numValues <= remainingBits) {
+    remainingBits -= numValues;
+  } else {
+    numValues -= remainingBits;
+    uint64_t bytesSkipped = numValues / 8;
+    ByteRleDecoderImpl::skip(bytesSkipped);
+    ByteRleDecoderImpl::next(&lastByte, 1, nullptr);
+    remainingBits = 8 - (numValues % 8);
+  }
+}
+
+void BooleanRleDecoderImpl::next(char *__restrict__ data, uint64_t numValues,
+                                 const char *__restrict__ notNull) {
+  // next spot to fill in
+  uint64_t position = 0;
+
+  // use up any remaining bits
+  if (notNull) {
+    while (remainingBits > 0 && position < numValues) {
+      if (notNull[position]) {
+        remainingBits -= 1;
+        data[position] =
+            (static_cast<unsigned char>(lastByte) >> remainingBits) & 0x1;
+      } else {
+        data[position] = 0;
+      }
+      position += 1;
+    }
+  } else {
+    while (remainingBits > 0 && position < numValues) {
+      remainingBits -= 1;
+      data[position++] =
+          (static_cast<unsigned char>(lastByte) >> remainingBits) & 0x1;
+    }
+  }
+
+  // count the number of nonNulls remaining
+  uint64_t nonNulls = numValues - position;
+  if (notNull) {
+    for (uint64_t i = position; i < numValues; ++i) {
+      if (!notNull[i]) {
+        nonNulls -= 1;
+      }
+    }
+  }
+
+  // fill in the remaining values
+  if (nonNulls == 0) {
+    while (position < numValues) {
+      data[position++] = 0;
+    }
+  } else if (position < numValues) {
+    // read the new bytes into the array
+    uint64_t bytesRead = (nonNulls + 7) / 8;
+    ByteRleDecoderImpl::next(data + position, bytesRead, nullptr);
+    lastByte = data[position + bytesRead - 1];
+    remainingBits = bytesRead * 8 - nonNulls;
+    // expand the array backwards so that we don't clobber the data
+    uint64_t bitsLeft = bytesRead * 8 - remainingBits;
+    if (notNull) {
+      for (int64_t i = static_cast<int64_t>(numValues) - 1;
+           i >= static_cast<int64_t>(position); --i) {
+        if (notNull[i]) {
+          uint64_t shiftPosn = (-bitsLeft) % 8;
+          data[i] = (data[position + (bitsLeft - 1) / 8] >> shiftPosn) & 0x1;
+          bitsLeft -= 1;
+        } else {
+          data[i] = 0;
+        }
+      }
+    } else {
+      // performance: edit the code below carefully
+      const char *__restrict__ dataSrc = data;
+      int64_t i = static_cast<int64_t>(numValues) - 1;
+#ifdef AVX_OPT
+      int64_t positionEnd = i - (i - position + 1) % 16;
+      assert((positionEnd - position + 1) % 16 == 0);
+#else
+      int64_t positionEnd = static_cast<int64_t>(position) - 1;
+#endif
+      // step 1: remove the back element to align to 16 byte e.g. 128 bit
+      for (; i > positionEnd;) {
+        uint8_t shiftPosn = (-bitsLeft) % 8;
+        data[i] = (dataSrc[position + (bitsLeft - 1) / 8] >> shiftPosn) & 0x1;
+        --i, --bitsLeft;
+        if (shiftPosn == 7) break;
+      }
+      for (; i - 7 > positionEnd; i -= 8, bitsLeft -= 8) {
+        char tmpDataSrc = dataSrc[position + (bitsLeft - 1) / 8];
+        uint64_t tmpBuf;
+#ifdef IS_BIG_ENDIAN
+#pragma clang loop unroll(full)
+        for (int8_t shiftPosn = 7; shiftPosn >= 0; shiftPosn--) {
+          tmpBuf <<= 8;
+          tmpBuf |= (char)(tmpDataSrc >> shiftPosn) & 0x1;
+        }
+#else
+#pragma clang loop unroll(full)
+        for (int8_t shiftPosn = 0; shiftPosn <= 7; shiftPosn++) {
+          tmpBuf <<= 8;
+          tmpBuf |= (char)(tmpDataSrc >> shiftPosn) & 0x1;
+        }
+#endif
+        uint64_t *tmpPtr = (uint64_t *)&data[i - 7];
+        *tmpPtr = tmpBuf;
+      }
+// end of step 1
+#ifdef AVX_OPT
+      // step 2: simd
+      // intel cpus are all little endian
+      // 2 bytes src e.g. 16 bits expand to 16 bytes e.g. 128 bits
+      // todo: there could be more specific version for avx2, avx512
+      __m128i *tmpPtr = (__m128i *)&data[i - 15];
+      if ((uint64_t)tmpPtr % 16 == 0) {
+        // _mm128_store_si128 require aligned, otherwise exception
+        __m128i mask = _mm_set1_epi8(0x1);
+        for (; i - 15 >= static_cast<int64_t>(position);
+             i -= 16, bitsLeft -= 16) {
+          const char *tds = &dataSrc[position + (bitsLeft - 1) / 8 - 1];
+          __m128i src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, tds[1], 0, 0, 0, 0, 0,
+                                     0, 0, tds[0]);
+          // high to low in register
+          __m128i res = _mm_set1_epi8(0x0);
+          {
+            __m128i tmp;
+            // pay attention to shift right logically
+            tmp = _mm_slli_si128(src, 0);  // shift the byte
+            tmp = _mm_srli_epi64(tmp, 7);  // shift the bit
+            res = _mm_or_si128(res, tmp);
+
+            tmp = _mm_slli_si128(src, 1);  // shift the byte
+            tmp = _mm_srli_epi64(tmp, 6);  // shift the bit
+            res = _mm_or_si128(res, tmp);
+
+            tmp = _mm_slli_si128(src, 2);  // shift the byte
+            tmp = _mm_srli_epi64(tmp, 5);  // shift the bit
+            res = _mm_or_si128(res, tmp);
+
+            tmp = _mm_slli_si128(src, 3);  // shift the byte
+            tmp = _mm_srli_epi64(tmp, 4);  // shift the bit
+            res = _mm_or_si128(res, tmp);
+
+            tmp = _mm_slli_si128(src, 4);  // shift the byte
+            tmp = _mm_srli_epi64(tmp, 3);  // shift the bit
+            res = _mm_or_si128(res, tmp);
+
+            tmp = _mm_slli_si128(src, 5);  // shift the byte
+            tmp = _mm_srli_epi64(tmp, 2);  // shift the bit
+            res = _mm_or_si128(res, tmp);
+
+            tmp = _mm_slli_si128(src, 6);  // shift the byte
+            tmp = _mm_srli_epi64(tmp, 1);  // shift the bit
+            res = _mm_or_si128(res, tmp);
+
+            tmp = _mm_slli_si128(src, 7);  // shift the byte
+            tmp = _mm_srli_epi64(tmp, 0);  // shift the bit
+            res = _mm_or_si128(res, tmp);
+          }
+          res = _mm_and_si128(res, mask);
+          __m128i *tmpPtr = (__m128i *)&data[i - 15];
+          _mm_storeu_si128(tmpPtr, res);
+        }
+      } else {  // address not aligned
+        int64_t positionEnd = static_cast<int64_t>(position) - 1;
+        for (; i > positionEnd;) {
+          uint8_t shiftPosn = (-bitsLeft) % 8;
+          data[i] = (dataSrc[position + (bitsLeft - 1) / 8] >> shiftPosn) & 0x1;
+          --i, --bitsLeft;
+          if (shiftPosn == 7) break;
+        }
+        for (; i - 7 > positionEnd; i -= 8, bitsLeft -= 8) {
+          char tmpDataSrc = dataSrc[position + (bitsLeft - 1) / 8];
+          uint64_t tmpBuf;
+#pragma clang loop unroll(full)
+          for (int8_t shiftPosn = 0; shiftPosn <= 7; shiftPosn++) {
+            tmpBuf <<= 8;
+            tmpBuf |= (char)(tmpDataSrc >> shiftPosn) & 0x1;
+          }
+          uint64_t *tmpPtr = (uint64_t *)&data[i - 7];
+          *tmpPtr = tmpBuf;
+        }
+      }
+#endif
+      assert(bitsLeft == 0);
+    }
+  }
+}
+
+std::unique_ptr<ByteRleDecoder> createBooleanRleDecoder(
+    std::unique_ptr<SeekableInputStream> input) {
+  BooleanRleDecoderImpl *decoder = new BooleanRleDecoderImpl(std::move(input));
+  return std::unique_ptr<ByteRleDecoder>(
+      reinterpret_cast<ByteRleDecoder *>(decoder));
+}
+
+std::unique_ptr<ByteRleCoder> createByteRleCoder(CompressionKind kind) {
+  std::unique_ptr<ByteRleCoder> coder(
+      new ByteRleCoder(createBlockCompressor(kind)));
+  return std::move(coder);
+}
+
+BooleanRleEncoderImpl::BooleanRleEncoderImpl(
+    std::unique_ptr<SeekableOutputStream> output)
+    : ByteRleCoder(std::move(output)) {
+  bitsRemained = 8;
+  current = static_cast<char>(0);
+}
+
+BooleanRleEncoderImpl::~BooleanRleEncoderImpl() {}
+
+void BooleanRleEncoderImpl::write(const char *data, uint64_t numValues,
+                                  const char *notNull) {
+  for (uint64_t i = 0; i < numValues; ++i) {
+    if (bitsRemained == 0) {
+      ByteRleCoder::write(current);
+      current = static_cast<char>(0);
+      bitsRemained = 8;
+    }
+    if (!notNull || notNull[i]) {
+      if (!data || data[i]) {
+        current = static_cast<char>(current | (0x80 >> (8 - bitsRemained)));
+      }
+      --bitsRemained;
+    }
+  }
+  if (bitsRemained == 0) {
+    ByteRleCoder::write(current);
+    current = static_cast<char>(0);
+    bitsRemained = 8;
+  }
+}
+
+void BooleanRleEncoderImpl::flush() {
+  if (bitsRemained != 8) {
+    ByteRleCoder::write(current);
+  }
+  bitsRemained = 8;
+  current = static_cast<char>(0);
+  ByteRleCoder::flush();
+}
+
+void BooleanRleEncoderImpl::flushToStream(OutputStream *stream) {
+  flush();
+  ByteRleCoder::flushToStream(stream);
+}
+
+std::unique_ptr<BooleanRleEncoderImpl> createBooleanRleEncoderImpl(
+    CompressionKind kind) {
+  std::unique_ptr<BooleanRleEncoderImpl> coder(
+      new BooleanRleEncoderImpl(createBlockCompressor(kind)));
+  return std::move(coder);
+}
+}  // namespace orc
diff --git a/depends/storage/src/storage/format/orc/byte-rle.h b/depends/storage/src/storage/format/orc/byte-rle.h
new file mode 100644
index 0000000..719a4a0
--- /dev/null
+++ b/depends/storage/src/storage/format/orc/byte-rle.h
@@ -0,0 +1,237 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef STORAGE_SRC_STORAGE_FORMAT_ORC_BYTE_RLE_H_
+#define STORAGE_SRC_STORAGE_FORMAT_ORC_BYTE_RLE_H_
+
+#include <memory>
+#include <vector>
+
+#include "storage/format/orc/rle.h"
+#include "storage/format/orc/seekable-input-stream.h"
+#include "storage/format/orc/seekable-output-stream.h"
+
+namespace orc {
+
+class ByteRleDecoder {
+ public:
+  virtual ~ByteRleDecoder();
+
+  // Seek to a particular spot.
+  // @param pos The position to seek
+  // @return void
+  virtual void seek(PositionProvider& pos) = 0;  // NOLINT
+
+  // Seek over a given number of values.
+  // @param numValues Skip the number of values
+  // @return void
+  virtual void skip(uint64_t numValues) = 0;
+
+  // Read a number of values into the batch.
+  // @param data the array to read into
+  // @param numValues the number of values to read
+  // @param notNull If the pointer is null, all values are read. If the
+  //    pointer is not null, positions that are false are skipped.
+  virtual void next(char* data, uint64_t numValues, const char* notNull) = 0;
+};
+
+// Create a byte RLE decoder.
+// @param input the input stream to read from
+// @return The decoder
+std::unique_ptr<ByteRleDecoder> createByteRleDecoder(
+    std::unique_ptr<SeekableInputStream> input);
+
+// Create a boolean RLE decoder.
+// Unlike the other RLE decoders, the boolean decoder sets the data to 0
+// if the value is masked by notNull. This is required for the notNull stream
+// processing to properly apply multiple masks from nested types.
+// @param input the input stream to read from
+// @return The boolean RLE decoder
+std::unique_ptr<ByteRleDecoder> createBooleanRleDecoder(
+    std::unique_ptr<SeekableInputStream> input);
+
+class ByteRleDecoderImpl : public ByteRleDecoder {
+ public:
+  explicit ByteRleDecoderImpl(std::unique_ptr<SeekableInputStream> input);
+
+  virtual ~ByteRleDecoderImpl();
+
+  void seek(PositionProvider&) override;
+
+  void skip(uint64_t numValues) override;
+
+  void next(char* data, uint64_t numValues, const char* notNull) override;
+
+ protected:
+  inline void nextBuffer();
+  inline signed char readByte();
+  inline void readHeader();
+
+  std::unique_ptr<SeekableInputStream> inputStream;
+  size_t remainingValues;
+  char value;
+  const char* bufferStart;
+  const char* bufferEnd;
+  bool repeating;
+};
+
+// Run length byte encoder. A control byte is written before
+// each run with positive values 0 to 127 meaning 2 to 129 repetitions. If the
+// bytes is -1 to -128, 1 to 128 literal byte values follow.
+class ByteRleCoder : public RleCoder {
+ public:
+  explicit ByteRleCoder(std::unique_ptr<SeekableOutputStream> stream)
+      : output(std::move(stream)), literals(MAX_LITERAL_SIZE) {}
+  ~ByteRleCoder() {}
+
+  void flushToStream(OutputStream* os) override {
+    writeValues();
+    output->flushToStream(os);
+  }
+
+  void flush() { writeValues(); }
+
+  uint64_t getStreamSize() override { return output->getStreamSize(); }
+
+  void reset() override {
+    output->reset();
+    repeat = false;
+    tailRunLength = 0;
+    numLiterals = 0;
+  }
+
+  uint64_t getEstimatedSpaceNeeded() override {
+    // This is the maximal space used.
+    // It might not be accurate.
+    return output->getEstimatedSpaceNeeded() + sizeof(int8_t) * numLiterals +
+           sizeof(int8_t) /* control byte*/;
+  }
+
+  void write(void* data, uint64_t numValues, const char* notNull) override {
+    int8_t* d = reinterpret_cast<int8_t*>(data);
+
+    if (notNull) {
+      for (uint64_t i = 0; i < numValues; i++) {
+        if (notNull[i]) {
+          write(d[i]);
+        }
+      }
+    } else {
+      for (uint64_t i = 0; i < numValues; i++) {
+        write(d[i]);
+      }
+    }
+  }
+
+  void write(int8_t value) {
+    if (numLiterals == 0) {
+      literals[numLiterals++] = value;
+      tailRunLength = 1;
+    } else if (repeat) {
+      if (value == literals[0]) {
+        numLiterals += 1;
+        if (numLiterals == MAX_REPEAT_SIZE) {
+          writeValues();
+        }
+      } else {
+        writeValues();
+        literals[numLiterals++] = value;
+        tailRunLength = 1;
+      }
+    } else {
+      if (value == literals[numLiterals - 1]) {
+        tailRunLength += 1;
+      } else {
+        tailRunLength = 1;
+      }
+      if (tailRunLength == MIN_REPEAT_SIZE) {
+        if (numLiterals + 1 == MIN_REPEAT_SIZE) {
+          repeat = true;
+          numLiterals += 1;
+        } else {
+          numLiterals -= MIN_REPEAT_SIZE - 1;
+          writeValues();
+          literals[0] = value;
+          repeat = true;
+          numLiterals = MIN_REPEAT_SIZE;
+        }
+      } else {
+        literals[numLiterals++] = value;
+        if (numLiterals == MAX_LITERAL_SIZE) {
+          writeValues();
+        }
+      }
+    }
+  }
+
+ private:
+  void writeValues() {
+    if (numLiterals != 0) {
+      if (repeat) {
+        assert(numLiterals - MIN_REPEAT_SIZE >= 0);
+        output->write<int8_t>(numLiterals - MIN_REPEAT_SIZE);
+        output->write(reinterpret_cast<const char*>(literals.data()),
+                      sizeof(int8_t) * 1);
+      } else {
+        output->write<int8_t>(-numLiterals);
+        output->write(reinterpret_cast<const char*>(literals.data()),
+                      numLiterals * sizeof(int8_t));
+      }
+      repeat = false;
+      tailRunLength = 0;
+      numLiterals = 0;
+    }
+  }
+
+ private:
+  const int32_t MIN_REPEAT_SIZE = 3;
+  const int32_t MAX_LITERAL_SIZE = 128;
+  const int32_t MAX_REPEAT_SIZE = 127 + MIN_REPEAT_SIZE;
+
+  std::unique_ptr<SeekableOutputStream> output;
+  std::vector<int8_t> literals;
+  int32_t numLiterals = 0;
+  bool repeat = false;
+  int32_t tailRunLength = 0;
+};
+
+// Create a byte RLE coder.
+// @param output  The output stream to write to
+// @return The coder
+std::unique_ptr<ByteRleCoder> createByteRleCoder(CompressionKind kind);
+
+class BooleanRleEncoderImpl : public ByteRleCoder {
+ public:
+  BooleanRleEncoderImpl(std::unique_ptr<SeekableOutputStream> output);
+  virtual ~BooleanRleEncoderImpl() override;
+
+  virtual void write(const char* data, uint64_t numValues, const char* notNull);
+
+  virtual void flush();
+  virtual void flushToStream(OutputStream* stream) override;
+
+ private:
+  int bitsRemained;
+  char current;
+};
+std::unique_ptr<BooleanRleEncoderImpl> createBooleanRleEncoderImpl(
+    CompressionKind kind);
+
+}  // end of namespace orc
+#endif  // STORAGE_SRC_STORAGE_FORMAT_ORC_BYTE_RLE_H_
diff --git a/depends/storage/src/storage/format/orc/column-printer.cc b/depends/storage/src/storage/format/orc/column-printer.cc
new file mode 100644
index 0000000..896d1db
--- /dev/null
+++ b/depends/storage/src/storage/format/orc/column-printer.cc
@@ -0,0 +1,613 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "storage/format/orc/column-printer.h"
+
+#include <time.h>
+
+#include <limits>
+#include <sstream>
+#include <stdexcept>
+#include <typeinfo>
+
+#ifdef __clang__
+#pragma clang diagnostic ignored "-Wformat-security"
+#endif
+
+namespace orc {
+
+static void writeChar(std::string& file, char ch) {  // NOLINT
+  file += ch;
+}
+
+void writeString(std::string& file, const char* ptr) {  // NOLINT
+  size_t len = strlen(ptr);
+  file.append(ptr, len);
+}
+
+ColumnPrinter::ColumnPrinter(std::string& _buffer)
+    :  // NOLINT
+      buffer(_buffer) {
+  notNull = nullptr;
+  hasNulls = false;
+}
+
+ColumnPrinter::~ColumnPrinter() {
+  // PASS
+}
+
+void ColumnPrinter::reset(const ColumnVectorBatch& batch) {
+  hasNulls = batch.hasNulls;
+  if (hasNulls) {
+    notNull = batch.notNull.data();
+  } else {
+    notNull = nullptr;
+  }
+}
+
+std::unique_ptr<ColumnPrinter> createColumnPrinter(
+    std::string& buffer,  // NOLINT
+    const Type* type) {
+  ColumnPrinter* result = nullptr;
+  if (type == nullptr) {
+    result = new VoidColumnPrinter(buffer);
+  } else {
+    switch (static_cast<int64_t>(type->getKind())) {
+      case BOOLEAN:
+        result = new BooleanColumnPrinter(buffer);
+        break;
+
+      case SHORT:
+        result = new ShortColumnPrinter(buffer);
+        break;
+
+      case INT:
+        result = new IntColumnPrinter(buffer);
+        break;
+
+      case BYTE:
+      case LONG:
+        result = new LongColumnPrinter(buffer);
+        break;
+
+      case FLOAT:
+        result = new FloatColumnPrinter(buffer, *type);
+        break;
+
+      case DOUBLE:
+        result = new DoubleColumnPrinter(buffer, *type);
+        break;
+
+      case STRING:
+      case VARCHAR:
+      case CHAR:
+        result = new StringColumnPrinter(buffer);
+        break;
+
+      case BINARY:
+        result = new BinaryColumnPrinter(buffer);
+        break;
+
+      case TIMESTAMP:
+        result = new TimestampColumnPrinter(buffer);
+        break;
+
+      case LIST:
+        result = new ListColumnPrinter(buffer, *type);
+        break;
+
+      case MAP:
+        result = new MapColumnPrinter(buffer, *type);
+        break;
+
+      case STRUCT:
+        result = new StructColumnPrinter(buffer, *type);
+        break;
+
+      case DECIMAL:
+        if (type->getPrecision() == 0 || type->getPrecision() > 18) {
+          result = new Decimal128ColumnPrinter(buffer);
+        } else {
+          result = new Decimal64ColumnPrinter(buffer);
+        }
+        break;
+
+      case DATE:
+        result = new DateColumnPrinter(buffer);
+        break;
+
+      case TIME:
+        result = new TimeColumnPrinter(buffer);
+        break;
+
+      case UNION:
+        result = new UnionColumnPrinter(buffer, *type);
+        break;
+
+      default:
+        LOG_ERROR(ERRCODE_INTERNAL_ERROR, "unknown batch type");
+    }
+  }
+  return std::unique_ptr<ColumnPrinter>(result);
+}
+
+VoidColumnPrinter::VoidColumnPrinter(std::string& buffer)
+    : ColumnPrinter(buffer) {
+  // PASS
+}
+
+void VoidColumnPrinter::reset(const ColumnVectorBatch&) {
+  // PASS
+}
+
+void VoidColumnPrinter::printRow(uint64_t) { writeString(buffer, "null"); }
+
+FloatColumnPrinter::FloatColumnPrinter(std::string& buffer, const Type& type)
+    :  // NOLINT
+      ColumnPrinter(buffer),
+      data(nullptr) {
+  // PASS
+}
+
+void FloatColumnPrinter::reset(const ColumnVectorBatch& batch) {
+  ColumnPrinter::reset(batch);
+  data = dynamic_cast<const FloatVectorBatch&>(batch).data.data();
+}
+
+void FloatColumnPrinter::printRow(uint64_t rowId) {
+  if (hasNulls && !notNull[rowId]) {
+    writeString(buffer, "null");
+  } else {
+    char numBuffer[64];
+    snprintf(numBuffer, sizeof(numBuffer), "%.7g", data[rowId]);
+    writeString(buffer, numBuffer);
+  }
+}
+
+DoubleColumnPrinter::DoubleColumnPrinter(std::string& buffer, const Type& type)
+    :  // NOLINT
+      ColumnPrinter(buffer),
+      data(nullptr) {
+  // PASS
+}
+
+void DoubleColumnPrinter::reset(const ColumnVectorBatch& batch) {
+  ColumnPrinter::reset(batch);
+  data = dynamic_cast<const DoubleVectorBatch&>(batch).data.data();
+}
+
+void DoubleColumnPrinter::printRow(uint64_t rowId) {
+  if (hasNulls && !notNull[rowId]) {
+    writeString(buffer, "null");
+  } else {
+    char numBuffer[64];
+    snprintf(numBuffer, sizeof(numBuffer), "%.14g", data[rowId]);
+    writeString(buffer, numBuffer);
+  }
+}
+
+Decimal64ColumnPrinter::Decimal64ColumnPrinter(std::string& buffer)
+    : ColumnPrinter(buffer), data(nullptr), scale(0) {
+  // PASS
+}
+
+void Decimal64ColumnPrinter::reset(const ColumnVectorBatch& batch) {
+  ColumnPrinter::reset(batch);
+  data = dynamic_cast<const Decimal64VectorBatch&>(batch).values.data();
+  scale = dynamic_cast<const Decimal64VectorBatch&>(batch).scale;
+}
+
+std::string toDecimalString(int64_t value, int32_t scale) {
+  std::stringstream buffer;
+  if (scale == 0) {
+    buffer << value;
+    return buffer.str();
+  }
+  std::string sign = "";
+  if (value < 0) {
+    sign = "-";
+    value = -value;
+  }
+  buffer << value;
+  std::string str = buffer.str();
+  int32_t len = static_cast<int32_t>(str.length());
+  if (len > scale) {
+    return sign + str.substr(0, static_cast<size_t>(len - scale)) + "." +
+           str.substr(static_cast<size_t>(len - scale),
+                      static_cast<size_t>(scale));
+  } else if (len == scale) {
+    return sign + "0." + str;
+  } else {
+    std::string result = sign + "0.";
+    for (int32_t i = 0; i < scale - len; ++i) {
+      result += "0";
+    }
+    return result + str;
+  }
+}
+
+void Decimal64ColumnPrinter::printRow(uint64_t rowId) {
+  if (hasNulls && !notNull[rowId]) {
+    writeString(buffer, "null");
+  } else {
+    writeString(buffer, toDecimalString(data[rowId], scale).c_str());
+  }
+}
+
+Decimal128ColumnPrinter::Decimal128ColumnPrinter(std::string& buffer)
+    : ColumnPrinter(buffer), data(nullptr), scale(0) {
+  // PASS
+}
+
+void Decimal128ColumnPrinter::reset(const ColumnVectorBatch& batch) {
+  ColumnPrinter::reset(batch);
+  data = dynamic_cast<const Decimal128VectorBatch&>(batch).values.data();
+  scale = dynamic_cast<const Decimal128VectorBatch&>(batch).scale;
+}
+
+void Decimal128ColumnPrinter::printRow(uint64_t rowId) {
+  if (hasNulls && !notNull[rowId]) {
+    writeString(buffer, "null");
+  } else {
+    writeString(buffer, data[rowId].toDecimalString(scale).c_str());
+  }
+}
+
+StringColumnPrinter::StringColumnPrinter(std::string& buffer)
+    : ColumnPrinter(buffer), start(nullptr), length(nullptr) {
+  // PASS
+}
+
+void StringColumnPrinter::reset(const ColumnVectorBatch& batch) {
+  ColumnPrinter::reset(batch);
+  start = dynamic_cast<const BytesVectorBatch&>(batch).data.data();
+  length = dynamic_cast<const BytesVectorBatch&>(batch).length.data();
+}
+
+void StringColumnPrinter::printRow(uint64_t rowId) {
+  if (hasNulls && !notNull[rowId]) {
+    writeString(buffer, "null");
+  } else {
+    writeChar(buffer, '"');
+    for (int64_t i = 0; i < length[rowId]; ++i) {
+      char ch = static_cast<char>(start[rowId][i]);
+      switch (ch) {
+        case '\\':
+          writeString(buffer, "\\\\");
+          break;
+        case '\b':
+          writeString(buffer, "\\b");
+          break;
+        case '\f':
+          writeString(buffer, "\\f");
+          break;
+        case '\n':
+          writeString(buffer, "\\n");
+          break;
+        case '\r':
+          writeString(buffer, "\\r");
+          break;
+        case '\t':
+          writeString(buffer, "\\t");
+          break;
+        case '"':
+          writeString(buffer, "\\\"");
+          break;
+        default:
+          writeChar(buffer, ch);
+          break;
+      }
+    }
+    writeChar(buffer, '"');
+  }
+}
+
+ListColumnPrinter::ListColumnPrinter(std::string& buffer, const Type& type)
+    : ColumnPrinter(buffer), offsets(nullptr) {
+  elementPrinter = createColumnPrinter(buffer, type.getSubtype(0));
+}
+
+void ListColumnPrinter::reset(const ColumnVectorBatch& batch) {
+  ColumnPrinter::reset(batch);
+  offsets = dynamic_cast<const ListVectorBatch&>(batch).offsets.data();
+  elementPrinter->reset(*dynamic_cast<const ListVectorBatch&>(batch).elements);
+}
+
+void ListColumnPrinter::printRow(uint64_t rowId) {
+  if (hasNulls && !notNull[rowId]) {
+    writeString(buffer, "null");
+  } else {
+    writeChar(buffer, '[');
+    for (int64_t i = offsets[rowId]; i < offsets[rowId + 1]; ++i) {
+      if (i != offsets[rowId]) {
+        writeString(buffer, ", ");
+      }
+      elementPrinter->printRow(static_cast<uint64_t>(i));
+    }
+    writeChar(buffer, ']');
+  }
+}
+
+MapColumnPrinter::MapColumnPrinter(std::string& buffer, const Type& type)
+    : ColumnPrinter(buffer), offsets(nullptr) {
+  keyPrinter = createColumnPrinter(buffer, type.getSubtype(0));
+  elementPrinter = createColumnPrinter(buffer, type.getSubtype(1));
+}
+
+void MapColumnPrinter::reset(const ColumnVectorBatch& batch) {
+  ColumnPrinter::reset(batch);
+  const MapVectorBatch& myBatch = dynamic_cast<const MapVectorBatch&>(batch);
+  offsets = myBatch.offsets.data();
+  keyPrinter->reset(*myBatch.keys);
+  elementPrinter->reset(*myBatch.elements);
+}
+
+void MapColumnPrinter::printRow(uint64_t rowId) {
+  if (hasNulls && !notNull[rowId]) {
+    writeString(buffer, "null");
+  } else {
+    writeChar(buffer, '[');
+    for (int64_t i = offsets[rowId]; i < offsets[rowId + 1]; ++i) {
+      if (i != offsets[rowId]) {
+        writeString(buffer, ", ");
+      }
+      writeString(buffer, "{\"key\": ");
+      keyPrinter->printRow(static_cast<uint64_t>(i));
+      writeString(buffer, ", \"value\": ");
+      elementPrinter->printRow(static_cast<uint64_t>(i));
+      writeChar(buffer, '}');
+    }
+    writeChar(buffer, ']');
+  }
+}
+
+UnionColumnPrinter::UnionColumnPrinter(std::string& buffer, const Type& type)
+    : ColumnPrinter(buffer), tags(nullptr), offsets(nullptr) {
+  for (unsigned int i = 0; i < type.getSubtypeCount(); ++i) {
+    fieldPrinter.push_back(
+        createColumnPrinter(buffer, type.getSubtype(i)).release());
+  }
+}
+
+UnionColumnPrinter::~UnionColumnPrinter() {
+  for (size_t i = 0; i < fieldPrinter.size(); i++) {
+    delete fieldPrinter[i];
+  }
+}
+
+void UnionColumnPrinter::reset(const ColumnVectorBatch& batch) {
+  ColumnPrinter::reset(batch);
+  const UnionVectorBatch& unionBatch =
+      dynamic_cast<const UnionVectorBatch&>(batch);
+  tags = unionBatch.tags.data();
+  offsets = unionBatch.offsets.data();
+  for (size_t i = 0; i < fieldPrinter.size(); ++i) {
+    fieldPrinter[i]->reset(*(unionBatch.children[i]));
+  }
+}
+
+void UnionColumnPrinter::printRow(uint64_t rowId) {
+  if (hasNulls && !notNull[rowId]) {
+    writeString(buffer, "null");
+  } else {
+    writeString(buffer, "{\"tag\": ");
+    char numBuffer[64];
+    snprintf(numBuffer, sizeof(numBuffer),
+             "%"
+             "ll"
+             "d",
+             static_cast<int64_t>(tags[rowId]));
+    writeString(buffer, numBuffer);
+    writeString(buffer, ", \"value\": ");
+    fieldPrinter[tags[rowId]]->printRow(offsets[rowId]);
+    writeChar(buffer, '}');
+  }
+}
+
+StructColumnPrinter::StructColumnPrinter(std::string& buffer, const Type& type)
+    :  // NOLINT
+      ColumnPrinter(buffer) {
+  for (unsigned int i = 0; i < type.getSubtypeCount(); ++i) {
+    fieldNames.push_back(type.getFieldName(i));
+    fieldTypes.push_back(type.getSubtype(i)->toString());
+    fieldPrinter.push_back(
+        createColumnPrinter(buffer, type.getSubtype(i)).release());
+  }
+}
+
+StructColumnPrinter::~StructColumnPrinter() {
+  for (size_t i = 0; i < fieldPrinter.size(); i++) {
+    delete fieldPrinter[i];
+  }
+}
+
+void StructColumnPrinter::reset(const ColumnVectorBatch& batch) {
+  ColumnPrinter::reset(batch);
+  const StructVectorBatch& structBatch =
+      dynamic_cast<const StructVectorBatch&>(batch);
+  for (size_t i = 0; i < fieldPrinter.size(); ++i) {
+    fieldPrinter[i]->reset(*(structBatch.fields[i]));
+  }
+}
+
+void StructColumnPrinter::printRow(uint64_t rowId) {
+  if (hasNulls && !notNull[rowId]) {
+    writeString(buffer, "null");
+  } else {
+    writeChar(buffer, '{');
+    for (unsigned int i = 0; i < fieldPrinter.size(); ++i) {
+      if (i != 0) {
+        writeString(buffer, ", ");
+      }
+      writeChar(buffer, '"');
+      writeString(buffer, fieldNames[i].c_str());
+      writeChar(buffer, '-');
+      writeString(buffer, fieldTypes[i].c_str());
+      writeString(buffer, "\": ");
+      fieldPrinter[i]->printRow(rowId);
+    }
+    writeChar(buffer, '}');
+  }
+}
+
+DateColumnPrinter::DateColumnPrinter(std::string& buffer)
+    : ColumnPrinter(buffer), data(nullptr) {
+  // PASS
+}
+
+void DateColumnPrinter::printRow(uint64_t rowId) {
+  if (hasNulls && !notNull[rowId]) {
+    writeString(buffer, "null");
+  } else {
+    const time_t timeValue = data[rowId] * 24 * 60 * 60;
+    struct tm tmValue;
+    gmtime_r(&timeValue, &tmValue);
+    char timeBuffer[11];
+    strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d", &tmValue);
+    writeChar(buffer, '"');
+    writeString(buffer, timeBuffer);
+    writeChar(buffer, '"');
+  }
+}
+
+void DateColumnPrinter::reset(const ColumnVectorBatch& batch) {
+  ColumnPrinter::reset(batch);
+  data = dynamic_cast<const LongVectorBatch&>(batch).data.data();
+}
+
+TimeColumnPrinter::TimeColumnPrinter(std::string& buffer)
+    : ColumnPrinter(buffer), data(nullptr) {
+  // PASS
+}
+
+void TimeColumnPrinter::printRow(uint64_t rowId) {
+  if (hasNulls && !notNull[rowId]) {
+    writeString(buffer, "null");
+  } else {
+    const time_t timeValue = data[rowId] * 24 * 60 * 60;
+    struct tm tmValue;
+    gmtime_r(&timeValue, &tmValue);
+    char timeBuffer[11];
+    strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d", &tmValue);
+    writeChar(buffer, '"');
+    writeString(buffer, timeBuffer);
+    writeChar(buffer, '"');
+  }
+}
+
+void TimeColumnPrinter::reset(const ColumnVectorBatch& batch) {
+  ColumnPrinter::reset(batch);
+  data = dynamic_cast<const LongVectorBatch&>(batch).data.data();
+}
+
+BooleanColumnPrinter::BooleanColumnPrinter(std::string& buffer)
+    : ColumnPrinter(buffer), data(nullptr) {
+  // PASS
+}
+
+void BooleanColumnPrinter::printRow(uint64_t rowId) {
+  if (hasNulls && !notNull[rowId]) {
+    writeString(buffer, "null");
+  } else {
+    writeString(buffer, (data[rowId] ? "true" : "false"));
+  }
+}
+
+void BooleanColumnPrinter::reset(const ColumnVectorBatch& batch) {
+  ColumnPrinter::reset(batch);
+  data = dynamic_cast<const LongVectorBatch&>(batch).data.data();
+}
+
+BinaryColumnPrinter::BinaryColumnPrinter(std::string& buffer)
+    : ColumnPrinter(buffer), start(nullptr), length(nullptr) {
+  // PASS
+}
+
+void BinaryColumnPrinter::printRow(uint64_t rowId) {
+  if (hasNulls && !notNull[rowId]) {
+    writeString(buffer, "null");
+  } else {
+    writeChar(buffer, '[');
+    for (int64_t i = 0; i < length[rowId]; ++i) {
+      if (i != 0) {
+        writeString(buffer, ", ");
+      }
+      char numBuffer[64];
+      snprintf(numBuffer, sizeof(numBuffer), "%d",
+               (static_cast<const int>(start[rowId][i]) & 0xff));
+      writeString(buffer, numBuffer);
+    }
+    writeChar(buffer, ']');
+  }
+}
+
+void BinaryColumnPrinter::reset(const ColumnVectorBatch& batch) {
+  ColumnPrinter::reset(batch);
+  start = dynamic_cast<const BytesVectorBatch&>(batch).data.data();
+  length = dynamic_cast<const BytesVectorBatch&>(batch).length.data();
+}
+
+TimestampColumnPrinter::TimestampColumnPrinter(std::string& buffer)
+    : ColumnPrinter(buffer), seconds(nullptr), nanoseconds(nullptr) {
+  // PASS
+}
+
+void TimestampColumnPrinter::printRow(uint64_t rowId) {
+  const int64_t NANO_DIGITS = 9;
+  if (hasNulls && !notNull[rowId]) {
+    writeString(buffer, "null");
+  } else {
+    int64_t nanos = nanoseconds[rowId];
+    time_t secs = static_cast<time_t>(seconds[rowId]);
+    struct tm tmValue;
+    gmtime_r(&secs, &tmValue);
+    char timeBuffer[20];
+    strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue);
+    writeChar(buffer, '"');
+    writeString(buffer, timeBuffer);
+    writeChar(buffer, '.');
+    // remove trailing zeros off the back of the nanos value.
+    int64_t zeroDigits = 0;
+    if (nanos == 0) {
+      zeroDigits = 8;
+    } else {
+      while (nanos % 10 == 0) {
+        nanos /= 10;
+        zeroDigits += 1;
+      }
+    }
+    char numBuffer[64];
+    snprintf(numBuffer, sizeof(numBuffer),
+             "%0*"
+             "ll"
+             "d\"",
+             static_cast<int>(NANO_DIGITS - zeroDigits),
+             static_cast<int64_t>(nanos));
+    writeString(buffer, numBuffer);
+  }
+}
+
+void TimestampColumnPrinter::reset(const ColumnVectorBatch& batch) {
+  ColumnPrinter::reset(batch);
+  const TimestampVectorBatch& ts =
+      dynamic_cast<const TimestampVectorBatch&>(batch);
+  seconds = ts.data.data();
+  nanoseconds = ts.nanoseconds.data();
+}
+}  // namespace orc
diff --git a/depends/storage/src/storage/format/orc/column-printer.h b/depends/storage/src/storage/format/orc/column-printer.h
new file mode 100644
index 0000000..01b31cd
--- /dev/null
+++ b/depends/storage/src/storage/format/orc/column-printer.h
@@ -0,0 +1,280 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef STORAGE_SRC_STORAGE_FORMAT_ORC_COLUMN_PRINTER_H_
+#define STORAGE_SRC_STORAGE_FORMAT_ORC_COLUMN_PRINTER_H_
+
+#include <stdio.h>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "storage/format/orc/input-stream.h"
+#include "storage/format/orc/type.h"
+#include "storage/format/orc/vector.h"
+
+namespace orc {
+
+extern void writeString(std::string& file, const char* ptr);  // NOLINT
+
+class ColumnPrinter {
+ protected:
+  std::string& buffer;
+  bool hasNulls;
+  const char* notNull;
+
+ public:
+  explicit ColumnPrinter(std::string&);
+  virtual ~ColumnPrinter();
+  virtual void printRow(uint64_t rowId) = 0;
+  // should be called once at the start of each batch of rows
+  virtual void reset(const ColumnVectorBatch& batch);
+};
+
+std::unique_ptr<ColumnPrinter> createColumnPrinter(std::string&,
+                                                   const Type* type);
+
+class VoidColumnPrinter : public ColumnPrinter {
+ public:
+  explicit VoidColumnPrinter(std::string&);
+  ~VoidColumnPrinter() {}
+  void printRow(uint64_t rowId) override;
+  void reset(const ColumnVectorBatch& batch) override;
+};
+
+class BooleanColumnPrinter : public ColumnPrinter {
+ private:
+  const int64_t* data;
+
+ public:
+  explicit BooleanColumnPrinter(std::string&);
+  ~BooleanColumnPrinter() {}
+  void printRow(uint64_t rowId) override;
+  void reset(const ColumnVectorBatch& batch) override;
+};
+
+template <class ElementType>
+class FixedSizeColumnPrinter : public ColumnPrinter {
+ private:
+  const ElementType* data;
+
+ public:
+  explicit FixedSizeColumnPrinter(std::string& buffer)
+      :  // NOLINT
+        ColumnPrinter(buffer),
+        data(nullptr) {}
+  ~FixedSizeColumnPrinter() {}
+
+  void reset(const ColumnVectorBatch& batch) override {
+    ColumnPrinter::reset(batch);
+    data = reinterpret_cast<const ElementType*>(batch.getData());
+  }
+
+  void printRow(uint64_t rowId) override {
+    if (hasNulls && !notNull[rowId]) {
+      writeString(buffer, "null");
+    } else {
+      std::stringstream ss;
+      ss << data[rowId];
+      writeString(buffer, ss.str().c_str());
+    }
+  }
+};
+
+class LongColumnPrinter : public FixedSizeColumnPrinter<int64_t> {
+ public:
+  explicit LongColumnPrinter(std::string& buffer)
+      :  // NOLINT
+        FixedSizeColumnPrinter<int64_t>(buffer) {}
+  ~LongColumnPrinter() {}
+};
+
+class IntColumnPrinter : public FixedSizeColumnPrinter<int32_t> {
+ public:
+  explicit IntColumnPrinter(std::string& buffer)
+      :  // NOLINT
+        FixedSizeColumnPrinter<int32_t>(buffer) {}
+  ~IntColumnPrinter() {}
+};
+
+class ShortColumnPrinter : public FixedSizeColumnPrinter<int16_t> {
+ public:
+  explicit ShortColumnPrinter(std::string& buffer)
+      :  // NOLINT
+        FixedSizeColumnPrinter<int16_t>(buffer) {}
+  ~ShortColumnPrinter() {}
+};
+
+class FloatColumnPrinter : public ColumnPrinter {
+ private:
+  const float* data;
+
+ public:
+  explicit FloatColumnPrinter(std::string&, const Type& type);
+  virtual ~FloatColumnPrinter() {}
+  void printRow(uint64_t rowId) override;
+  void reset(const ColumnVectorBatch& batch) override;
+};
+
+class DoubleColumnPrinter : public ColumnPrinter {
+ private:
+  const double* data;
+
+ public:
+  explicit DoubleColumnPrinter(std::string&, const Type& type);
+  virtual ~DoubleColumnPrinter() {}
+  void printRow(uint64_t rowId) override;
+  void reset(const ColumnVectorBatch& batch) override;
+};
+
+class TimestampColumnPrinter : public ColumnPrinter {
+ private:
+  const int64_t* seconds;
+  const int64_t* nanoseconds;
+
+ public:
+  explicit TimestampColumnPrinter(std::string&);
+  ~TimestampColumnPrinter() {}
+  void printRow(uint64_t rowId) override;
+  void reset(const ColumnVectorBatch& batch) override;
+};
+
+class DateColumnPrinter : public ColumnPrinter {
+ private:
+  const int64_t* data;
+
+ public:
+  explicit DateColumnPrinter(std::string&);
+  ~DateColumnPrinter() {}
+  void printRow(uint64_t rowId) override;
+  void reset(const ColumnVectorBatch& batch) override;
+};
+
+class TimeColumnPrinter : public ColumnPrinter {
+ private:
+  const int64_t* data;
+
+ public:
+  explicit TimeColumnPrinter(std::string&);
+  ~TimeColumnPrinter() {}
+  void printRow(uint64_t rowId) override;
+  void reset(const ColumnVectorBatch& batch) override;
+};
+
+class Decimal64ColumnPrinter : public ColumnPrinter {
+ private:
+  const int64_t* data;
+  int32_t scale;
+
+ public:
+  explicit Decimal64ColumnPrinter(std::string&);
+  ~Decimal64ColumnPrinter() {}
+  void printRow(uint64_t rowId) override;
+  void reset(const ColumnVectorBatch& batch) override;
+};
+
+class Decimal128ColumnPrinter : public ColumnPrinter {
+ private:
+  const Int128* data;
+  int32_t scale;
+
+ public:
+  explicit Decimal128ColumnPrinter(std::string&);
+  ~Decimal128ColumnPrinter() {}
+  void printRow(uint64_t rowId) override;
+  void reset(const ColumnVectorBatch& batch) override;
+};
+
+class StringColumnPrinter : public ColumnPrinter {
+ private:
+  const char* const* start;
+  const int64_t* length;
+
+ public:
+  explicit StringColumnPrinter(std::string&);
+  virtual ~StringColumnPrinter() {}
+  void printRow(uint64_t rowId) override;
+  void reset(const ColumnVectorBatch& batch) override;
+};
+
+class BinaryColumnPrinter : public ColumnPrinter {
+ private:
+  const char* const* start;
+  const int64_t* length;
+
+ public:
+  explicit BinaryColumnPrinter(std::string&);
+  virtual ~BinaryColumnPrinter() {}
+  void printRow(uint64_t rowId) override;
+  void reset(const ColumnVectorBatch& batch) override;
+};
+
+class ListColumnPrinter : public ColumnPrinter {
+ private:
+  const int64_t* offsets;
+  std::unique_ptr<ColumnPrinter> elementPrinter;
+
+ public:
+  ListColumnPrinter(std::string&, const Type& type);
+  virtual ~ListColumnPrinter() {}
+  void printRow(uint64_t rowId) override;
+  void reset(const ColumnVectorBatch& batch) override;
+};
+
+class MapColumnPrinter : public ColumnPrinter {
+ private:
+  const int64_t* offsets;
+  std::unique_ptr<ColumnPrinter> keyPrinter;
+  std::unique_ptr<ColumnPrinter> elementPrinter;
+
+ public:
+  MapColumnPrinter(std::string&, const Type& type);
+  virtual ~MapColumnPrinter() {}
+  void printRow(uint64_t rowId) override;
+  void reset(const ColumnVectorBatch& batch) override;
+};
+
+class UnionColumnPrinter : public ColumnPrinter {
+ private:
+  const unsigned char* tags;
+  const uint64_t* offsets;
+  std::vector<ColumnPrinter*> fieldPrinter;
+
+ public:
+  UnionColumnPrinter(std::string&, const Type& type);
+  virtual ~UnionColumnPrinter();
+  void printRow(uint64_t rowId) override;
+  void reset(const ColumnVectorBatch& batch) override;
+};
+
+class StructColumnPrinter : public ColumnPrinter {
+ private:
+  std::vector<ColumnPrinter*> fieldPrinter;
+  std::vector<std::string> fieldNames;
+  std::vector<std::string> fieldTypes;
+
+ public:
+  StructColumnPrinter(std::string&, const Type& type);
+  virtual ~StructColumnPrinter();
+  void printRow(uint64_t rowId) override;
+  void reset(const ColumnVectorBatch& batch) override;
+};
+
+}  // namespace orc
+#endif  // STORAGE_SRC_STORAGE_FORMAT_ORC_COLUMN_PRINTER_H_
diff --git a/depends/storage/src/storage/format/orc/data-buffer.cc b/depends/storage/src/storage/format/orc/data-buffer.cc
new file mode 100644
index 0000000..5018e02
--- /dev/null
+++ b/depends/storage/src/storage/format/orc/data-buffer.cc
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "storage/format/orc/data-buffer.h"
+
+#include "dbcommon/utils/cutils.h"
+#include "dbcommon/utils/global.h"
+
+#include "storage/format/orc/int128.h"
+
+namespace orc {
+
+template <class T>
+DataBuffer<T>::DataBuffer(dbcommon::MemoryPool& pool,  // NOLINT
+                          uint64_t newSize)
+    : memoryPool(pool), buf(nullptr), currentSize(0), currentCapacity(0) {
+  if (newSize) resize(newSize);
+}
+
+template <class T>
+DataBuffer<T>::~DataBuffer() {
+  if (buf) memoryPool.free(buf);
+}
+
+template <class T>
+void DataBuffer<T>::resize(uint64_t newSize) {
+  if (buf) {
+    buf = memoryPool.realloc<T>(buf, sizeof(T) * newSize);
+  } else {
+    buf = memoryPool.malloc<T>(sizeof(T) * newSize);
+  }
+  currentCapacity = memoryPool.getSpace() / sizeof(T);
+  currentSize = newSize;
+}
+
+template class DataBuffer<bool>;
+template class DataBuffer<char>;
+template class DataBuffer<char*>;
+template class DataBuffer<float>;
+template class DataBuffer<double>;
+template class DataBuffer<Int128>;
+template class DataBuffer<int64_t>;
+template class DataBuffer<uint64_t>;
+template class DataBuffer<int32_t>;
+template class DataBuffer<uint32_t>;
+template class DataBuffer<int16_t>;
+template class DataBuffer<uint16_t>;
+template class DataBuffer<int8_t>;
+template class DataBuffer<uint8_t>;
+
+}  // namespace orc
diff --git a/depends/storage/src/storage/format/orc/data-buffer.h b/depends/storage/src/storage/format/orc/data-buffer.h
new file mode 100644
index 0000000..d32989d
--- /dev/null
+++ b/depends/storage/src/storage/format/orc/data-buffer.h
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef STORAGE_SRC_STORAGE_FORMAT_ORC_DATA_BUFFER_H_
+#define STORAGE_SRC_STORAGE_FORMAT_ORC_DATA_BUFFER_H_
+
+#include <memory>
+
+#include "dbcommon/utils/memory-pool.h"
+
+namespace orc {
+
+template <class T>
+class DataBuffer {
+ private:
+  dbcommon::MemoryPool& memoryPool;
+  T* buf;
+  // current size
+  uint64_t currentSize;
+  // maximal capacity (actual allocated memory)
+  uint64_t currentCapacity;
+
+  // not implemented
+  DataBuffer(DataBuffer& buffer);             // NOLINT
+  DataBuffer& operator=(DataBuffer& buffer);  // NOLINT
+
+ public:
+  explicit DataBuffer(dbcommon::MemoryPool& pool,  // NOLINT
+                      uint64_t _size = 0);
+  virtual ~DataBuffer();
+
+  T* data() { return buf; }
+
+  const T* data() const { return buf; }
+
+  uint64_t size() { return currentSize; }
+
+  uint64_t capacity() { return currentCapacity; }
+
+  T& operator[](uint64_t i) { return buf[i]; }
+
+  void resize(uint64_t _size);
+};
+
+}  // namespace orc
+#endif  // STORAGE_SRC_STORAGE_FORMAT_ORC_DATA_BUFFER_H_
diff --git a/depends/storage/src/storage/format/orc/exceptions.cc b/depends/storage/src/storage/format/orc/exceptions.cc
new file mode 100644
index 0000000..6d1ecc5
--- /dev/null
+++ b/depends/storage/src/storage/format/orc/exceptions.cc
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "storage/format/orc/exceptions.h"
+
+namespace orc {
+
+NotImplementedYet::NotImplementedYet(const std::string& what_arg)
+    : logic_error(what_arg) {
+  // PASS
+}
+
+NotImplementedYet::NotImplementedYet(const char* what_arg)
+    : logic_error(what_arg) {
+  // PASS
+}
+
+NotImplementedYet::NotImplementedYet(const NotImplementedYet& error)
+    : logic_error(error) {
+  // PASS
+}
+
+NotImplementedYet::~NotImplementedYet() noexcept {
+  // PASS
+}
+
+ParseError::ParseError(const std::string& what_arg) : runtime_error(what_arg) {
+  // PASS
+}
+
+ParseError::ParseError(const char* what_arg) : runtime_error(what_arg) {
+  // PASS
+}
+
+ParseError::ParseError(const ParseError& error) : runtime_error(error) {
+  // PASS
+}
+
+ParseError::~ParseError() noexcept {
+  // PASS
+}
+}  // namespace orc
diff --git a/depends/storage/src/storage/format/orc/exceptions.h b/depends/storage/src/storage/format/orc/exceptions.h
new file mode 100644
index 0000000..98beecf
--- /dev/null
+++ b/depends/storage/src/storage/format/orc/exceptions.h
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef STORAGE_SRC_STORAGE_FORMAT_ORC_EXCEPTIONS_H_
+#define STORAGE_SRC_STORAGE_FORMAT_ORC_EXCEPTIONS_H_
+
+#include <stdexcept>
+#include <string>
+
+namespace orc {
+
+class NotImplementedYet : public std::logic_error {
+ public:
+  explicit NotImplementedYet(const std::string& what_arg);
+  explicit NotImplementedYet(const char* what_arg);
+  virtual ~NotImplementedYet() noexcept;
+  NotImplementedYet(const NotImplementedYet&);
+
+ private:
+  NotImplementedYet& operator=(const NotImplementedYet&);
+};
+
+class ParseError : public std::runtime_error {
+ public:
+  explicit ParseError(const std::string& what_arg);
+  explicit ParseError(const char* what_arg);
+  virtual ~ParseError() noexcept;
+  ParseError(const ParseError&);
+
+ private:
+  ParseError& operator=(const ParseError&);
+};
+}  // namespace orc
+
+#endif  // STORAGE_SRC_STORAGE_FORMAT_ORC_EXCEPTIONS_H_
diff --git a/depends/storage/src/storage/format/orc/file-version.h b/depends/storage/src/storage/format/orc/file-version.h
new file mode 100644
index 0000000..25d2e89
--- /dev/null
+++ b/depends/storage/src/storage/format/orc/file-version.h
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef STORAGE_SRC_STORAGE_FORMAT_ORC_FILE_VERSION_H_
+#define STORAGE_SRC_STORAGE_FORMAT_ORC_FILE_VERSION_H_
+
+#include <string>
+
+namespace orc {
+
+class FileVersion {
+ private:
+  uint32_t majorVersion;
+  uint32_t minorVersion;
+
+ public:
+  FileVersion(uint32_t major, uint32_t minor)
+      : majorVersion(major), minorVersion(minor) {}
+
+  uint32_t getMajor() const { return this->majorVersion; }
+
+  uint32_t getMinor() const { return this->minorVersion; }
+
+  bool operator==(const FileVersion& right) const {
+    return this->majorVersion == right.getMajor() &&
+           this->minorVersion == right.getMinor();
+  }
+
+  bool operator!=(const FileVersion& right) const { return !(*this == right); }
+
+  std::string toString() const {
+    std::stringstream ss;
+    ss << getMajor() << '.' << getMinor();
+    return ss.str();
+  }
+};
+
+}  // namespace orc
+
+#endif  // STORAGE_SRC_STORAGE_FORMAT_ORC_FILE_VERSION_H_
diff --git a/depends/storage/src/storage/format/orc/input-stream.cc b/depends/storage/src/storage/format/orc/input-stream.cc
new file mode 100644
index 0000000..c523c01
--- /dev/null
+++ b/depends/storage/src/storage/format/orc/input-stream.cc
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <cerrno>
+
+#include "dbcommon/filesystem/file-system.h"
+#include "storage/format/orc/exceptions.h"
+#include "storage/format/orc/input-stream.h"
+
+namespace orc {
+
+std::unique_ptr<InputStream> readFile(dbcommon::FileSystem *fs,
+                                      const std::string &path) {
+  return std::unique_ptr<InputStream>(new GeneralFileInputStream(fs, path));
+}
+
+}  // namespace orc
diff --git a/depends/storage/src/storage/format/orc/input-stream.h b/depends/storage/src/storage/format/orc/input-stream.h
new file mode 100644
index 0000000..1d0550f
--- /dev/null
+++ b/depends/storage/src/storage/format/orc/input-stream.h
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef STORAGE_SRC_STORAGE_FORMAT_ORC_INPUT_STREAM_H_
+#define STORAGE_SRC_STORAGE_FORMAT_ORC_INPUT_STREAM_H_
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <cassert>
+#include <cerrno>
+#include <memory>
+#include <string>
+
+#include "dbcommon/filesystem/file-system-manager.h"
+#include "dbcommon/filesystem/file-system.h"
+#include "storage/format/orc/exceptions.h"
+
+// The top level interface to ORC.
+namespace orc {
+
+// An abstract interface for providing ORC readers a stream of bytes.
+class InputStream {
+ public:
+  InputStream() {}
+  virtual ~InputStream() {}
+
+  // Get the total length of the file in bytes.
+  virtual uint64_t getLength() const = 0;
+
+  // Get the natural size for reads.
+  // @return the number of bytes that should be read at once
+  virtual uint64_t getNaturalReadSize() const = 0;
+
+  // Read length bytes from the file starting at offset into
+  // the buffer starting at buf.
+  // @param buf the starting position of a buffer.
+  // @param length the number of bytes to read.
+  // @param offset the position in the stream to read from.
+  virtual void read(void* buf, uint64_t length, uint64_t offset) = 0;
+
+  // Get the name of the stream for error messages.
+  virtual const std::string& getName() const = 0;
+
+  virtual void readBloomFilter(void* buf, uint64_t length, uint64_t offset) = 0;
+};
+
+class GeneralFileInputStream : public InputStream {
+ public:
+  GeneralFileInputStream(dbcommon::FileSystem* fs, std::string fileName)
+      : fs(fs), fileName(fileName) {
+    file = fs->open(fileName.c_str(), O_RDONLY);
+    totalLength = fs->getFileLength(fileName.c_str());
+  }
+
+  virtual ~GeneralFileInputStream() {}
+
+  uint64_t getLength() const override { return totalLength; }
+
+  uint64_t getNaturalReadSize() const override { return 128 * 1024; }
+
+  void read(void* buf, uint64_t length, uint64_t offset) override {
+    assert(buf != nullptr);
+
+    fs->seek(file.get(), offset);
+    int bytesRead = fs->read(file.get(), buf, length);
+  }
+
+  void readBloomFilter(void* buf, uint64_t length, uint64_t offset) override {
+    assert(buf != nullptr);
+    if (!bloomFilterHandler)
+      bloomFilterHandler = fs->open(fileName.c_str(), O_RDONLY);
+    fs->seek(bloomFilterHandler.get(), offset);
+    int bytesRead = fs->read(bloomFilterHandler.get(), buf, length);
+  }
+
+  const std::string& getName() const override { return fileName; }
+
+ private:
+  std::string fileName;
+  std::unique_ptr<dbcommon::File> file = nullptr;
+  std::unique_ptr<dbcommon::File> bloomFilterHandler = nullptr;
+  uint64_t totalLength = 0;
+  dbcommon::FileSystem* fs = nullptr;
+};
+
+std::unique_ptr<InputStream> readFile(dbcommon::FileSystem* fs,
+                                      const std::string& path);
+
+}  // end of namespace orc
+
+#endif  // STORAGE_SRC_STORAGE_FORMAT_ORC_INPUT_STREAM_H_
diff --git a/depends/storage/src/storage/format/orc/int128.cc b/depends/storage/src/storage/format/orc/int128.cc
new file mode 100644
index 0000000..fa3b0de
--- /dev/null
+++ b/depends/storage/src/storage/format/orc/int128.cc
@@ -0,0 +1,480 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <algorithm>
+#include <iomanip>
+#include <iostream>
+#include <sstream>
+
+#include "dbcommon/log/logger.h"
+#include "storage/format/orc/int128.h"
+
+namespace orc {
+
+Int128 Int128::maximumValue() {
+  return Int128(0x7fffffffffffffff, 0xfffffffffffffff);
+}
+
+Int128 Int128::minimumValue() {
+  return Int128(static_cast<int64_t>(0x8000000000000000), 0x0);
+}
+
+Int128::Int128(const std::string& str) {
+  lowbits = 0;
+  highbits = 0;
+  size_t length = str.length();
+  if (length > 0) {
+    bool isNegative = str[0] == '-';
+    size_t posn = isNegative ? 1 : 0;
+    while (posn < length) {
+      size_t group = std::min(18ul, length - posn);
+      int64_t chunk = std::stoll(str.substr(posn, group));
+      int64_t multiple = 1;
+      for (size_t i = 0; i < group; ++i) {
+        multiple *= 10;
+      }
+      *this *= multiple;
+      *this += chunk;
+      posn += group;
+    }
+    if (isNegative) {
+      negate();
+    }
+  }
+}
+
+Int128& Int128::operator*=(const Int128& right) {
+  const uint64_t INT_MASK = 0xffffffff;
+  const uint64_t CARRY_BIT = 1l << 32;
+
+  // Break the left and right numbers into 32 bit chunks
+  // so that we can multiply them without overflow.
+  uint64_t L0 = static_cast<uint64_t>(highbits) >> 32;
+  uint64_t L1 = static_cast<uint64_t>(highbits) & INT_MASK;
+  uint64_t L2 = lowbits >> 32;
+  uint64_t L3 = lowbits & INT_MASK;
+  uint64_t R0 = static_cast<uint64_t>(right.highbits) >> 32;
+  uint64_t R1 = static_cast<uint64_t>(right.highbits) & INT_MASK;
+  uint64_t R2 = right.lowbits >> 32;
+  uint64_t R3 = right.lowbits & INT_MASK;
+
+  uint64_t product = L3 * R3;
+  lowbits = product & INT_MASK;
+  uint64_t sum = product >> 32;
+  product = L2 * R3;
+  sum += product;
+  highbits = sum < product ? CARRY_BIT : 0;
+  product = L3 * R2;
+  sum += product;
+  if (sum < product) {
+    highbits += CARRY_BIT;
+  }
+  lowbits += sum << 32;
+  highbits += static_cast<int64_t>(sum >> 32);
+  highbits += L1 * R3 + L2 * R2 + L3 * R1;
+  highbits += (L0 * R3 + L1 * R2 + L2 * R1 + L3 * R0) << 32;
+  return *this;
+}
+
+// Expands the given value into an array of ints so that we can work on
+// it. The array will be converted to an absolute value and the wasNegative
+// flag will be set appropriately. The array will remove leading zeros from
+// the value.
+// @param array an array of length 4 to set with the value
+// @param wasNegative a flag for whether the value was original negative
+// @result the output length of the array
+int64_t Int128::fillInArray(uint32_t* array, bool& wasNegative) const {
+  uint64_t high;
+  uint64_t low;
+  if (highbits < 0) {
+    low = ~lowbits + 1;
+    high = static_cast<uint64_t>(~highbits);
+    if (low == 0) {
+      high += 1;
+    }
+    wasNegative = true;
+  } else {
+    low = lowbits;
+    high = static_cast<uint64_t>(highbits);
+    wasNegative = false;
+  }
+  if (high != 0) {
+    if (high > UINT32_MAX) {
+      array[0] = static_cast<uint32_t>(high >> 32);
+      array[1] = static_cast<uint32_t>(high);
+      array[2] = static_cast<uint32_t>(low >> 32);
+      array[3] = static_cast<uint32_t>(low);
+      return 4;
+    } else {
+      array[0] = static_cast<uint32_t>(high);
+      array[1] = static_cast<uint32_t>(low >> 32);
+      array[2] = static_cast<uint32_t>(low);
+      return 3;
+    }
+  } else if (low >= UINT32_MAX) {
+    array[0] = static_cast<uint32_t>(low >> 32);
+    array[1] = static_cast<uint32_t>(low);
+    return 2;
+  } else if (low == 0) {
+    return 0;
+  } else {
+    array[0] = static_cast<uint32_t>(low);
+    return 1;
+  }
+}
+
+// Find last set bit in a 32 bit integer. Bit 1 is the LSB and bit 32 is
+// the MSB. We can replace this with bsrq asm instruction on x64.
+int64_t fls(uint32_t x) {
+  int64_t bitpos = 0;
+  while (x) {
+    x >>= 1;
+    bitpos += 1;
+  }
+  return bitpos;
+}
+
+// Shift the number in the array left by bits positions.
+// @param array the number to shift, must have length elements
+// @param length the number of entries in the array
+// @param bits the number of bits to shift (0 <= bits < 32)
+void shiftArrayLeft(uint32_t* array, int64_t length, int64_t bits) {
+  if (length > 0 && bits != 0) {
+    for (int64_t i = 0; i < length - 1; ++i) {
+      array[i] = (array[i] << bits) | (array[i + 1] >> (32 - bits));
+    }
+    array[length - 1] <<= bits;
+  }
+}
+
+// Shift the number in the array right by bits positions.
+// @param array the number to shift, must have length elements
+// @param length the number of entries in the array
+// @param bits the number of bits to shift (0 <= bits < 32)
+void shiftArrayRight(uint32_t* array, int64_t length, int64_t bits) {
+  if (length > 0 && bits != 0) {
+    for (int64_t i = length - 1; i > 0; --i) {
+      array[i] = (array[i] >> bits) | (array[i - 1] << (32 - bits));
+    }
+    array[0] >>= bits;
+  }
+}
+
+// Fix the signs of the result and remainder at the end of the division
+// based on the signs of the dividend and divisor.
+void fixDivisionSigns(Int128& result, Int128& remainder,  // NOLINT
+                      bool dividendWasNegative, bool divisorWasNegative) {
+  if (dividendWasNegative != divisorWasNegative) {
+    result.negate();
+  }
+  if (dividendWasNegative) {
+    remainder.negate();
+  }
+}
+
+// Build a Int128 from a list of ints.
+void buildFromArray(Int128& value, uint32_t* array, int64_t length) {  // NOLINT
+  switch (length) {
+    case 0:
+      value = 0;
+      break;
+    case 1:
+      value = array[0];
+      break;
+    case 2:
+      value = Int128(0, (static_cast<uint64_t>(array[0]) << 32) + array[1]);
+      break;
+    case 3:
+      value =
+          Int128(array[0], (static_cast<uint64_t>(array[1]) << 32) + array[2]);
+      break;
+    case 4:
+      value = Int128((static_cast<int64_t>(array[0]) << 32) + array[1],
+                     (static_cast<uint64_t>(array[2]) << 32) + array[3]);
+      break;
+    case 5:
+      if (array[0] != 0) {
+        LOG_ERROR(ERRCODE_INTERNAL_ERROR, "Can't build Int128 with 5 ints.");
+      }
+      value = Int128((static_cast<int64_t>(array[1]) << 32) + array[2],
+                     (static_cast<uint64_t>(array[3]) << 32) + array[4]);
+      break;
+    default:
+      LOG_ERROR(ERRCODE_INTERNAL_ERROR,
+                "Unsupported length for building Int128");
+  }
+}
+
+// Do a division where the divisor fits into a single 32 bit value.
+Int128 singleDivide(uint32_t* dividend, int64_t dividendLength,
+                    uint32_t divisor, Int128& remainder,  // NOLINT
+                    bool dividendWasNegative,             // NOLINT
+                    bool divisorWasNegative) {
+  uint64_t r = 0;
+  uint32_t resultArray[5];
+  for (int64_t j = 0; j < dividendLength; j++) {
+    r <<= 32;
+    r += dividend[j];
+    resultArray[j] = static_cast<uint32_t>(r / divisor);
+    r %= divisor;
+  }
+  Int128 result;
+  buildFromArray(result, resultArray, dividendLength);
+  remainder = static_cast<int64_t>(r);
+  fixDivisionSigns(result, remainder, dividendWasNegative, divisorWasNegative);
+  return result;
+}
+
+Int128 Int128::divide(const Int128& divisor, Int128& remainder) const {
+  // Split the dividend and divisor into integer pieces so that we can
+  // work on them.
+  uint32_t dividendArray[5];
+  uint32_t divisorArray[4];
+  bool dividendWasNegative;
+  bool divisorWasNegative;
+  // leave an extra zero before the dividend
+  dividendArray[0] = 0;
+  int64_t dividendLength =
+      fillInArray(dividendArray + 1, dividendWasNegative) + 1;
+  int64_t divisorLength = divisor.fillInArray(divisorArray, divisorWasNegative);
+
+  // Handle some of the easy cases.
+  if (dividendLength <= divisorLength) {
+    remainder = *this;
+    return 0;
+  } else if (divisorLength == 0) {
+    LOG_ERROR(ERRCODE_INTERNAL_ERROR, "Division by 0 in Int128");
+  } else if (divisorLength == 1) {
+    return singleDivide(dividendArray, dividendLength, divisorArray[0],
+                        remainder, dividendWasNegative, divisorWasNegative);
+  }
+
+  int64_t resultLength = dividendLength - divisorLength;
+  uint32_t resultArray[4];
+
+  // Normalize by shifting both by a multiple of 2 so that
+  // the digit guessing is better. The requirement is that
+  // divisorArray[0] is greater than 2**31.
+  int64_t normalizeBits = 32 - fls(divisorArray[0]);
+  shiftArrayLeft(divisorArray, divisorLength, normalizeBits);
+  shiftArrayLeft(dividendArray, dividendLength, normalizeBits);
+
+  // compute each digit in the result
+  for (int64_t j = 0; j < resultLength; ++j) {
+    // Guess the next digit. At worst it is two too large
+    uint32_t guess = UINT32_MAX;
+    uint64_t highDividend =
+        static_cast<uint64_t>(dividendArray[j]) << 32 | dividendArray[j + 1];
+    if (dividendArray[j] != divisorArray[0]) {
+      guess = static_cast<uint32_t>(highDividend / divisorArray[0]);
+    }
+
+    // catch all of the cases where guess is two too large and most of the
+    // cases where it is one too large
+    uint32_t rhat = static_cast<uint32_t>(
+        highDividend - guess * static_cast<uint64_t>(divisorArray[0]));
+    while (static_cast<uint64_t>(divisorArray[1]) * guess >
+           (static_cast<uint64_t>(rhat) << 32) + dividendArray[j + 2]) {
+      guess -= 1;
+      rhat += divisorArray[0];
+      if (static_cast<uint64_t>(rhat) < divisorArray[0]) {
+        break;
+      }
+    }
+
+    // subtract off the guess * divisor from the dividend
+    uint64_t mult = 0;
+    for (int64_t i = divisorLength - 1; i >= 0; --i) {
+      mult += static_cast<uint64_t>(guess) * divisorArray[i];
+      uint32_t prev = dividendArray[j + i + 1];
+      dividendArray[j + i + 1] -= static_cast<uint32_t>(mult);
+      mult >>= 32;
+      if (dividendArray[j + i + 1] > prev) {
+        mult += 1;
+      }
+    }
+    uint32_t prev = dividendArray[j];
+    dividendArray[j] -= static_cast<uint32_t>(mult);
+
+    // if guess was too big, we add back divisor
+    if (dividendArray[j] > prev) {
+      guess -= 1;
+      uint32_t carry = 0;
+      for (int64_t i = divisorLength - 1; i >= 0; --i) {
+        uint64_t sum = static_cast<uint64_t>(divisorArray[i]) +
+                       dividendArray[j + i + 1] + carry;
+        dividendArray[j + i + 1] = static_cast<uint32_t>(sum);
+        carry = static_cast<uint32_t>(sum >> 32);
+      }
+      dividendArray[j] += carry;
+    }
+
+    resultArray[j] = guess;
+  }
+
+  // denormalize the remainder
+  shiftArrayRight(dividendArray, dividendLength, normalizeBits);
+
+  // return result and remainder
+  Int128 result;
+  buildFromArray(result, resultArray, resultLength);
+  buildFromArray(remainder, dividendArray, dividendLength);
+  fixDivisionSigns(result, remainder, dividendWasNegative, divisorWasNegative);
+  return result;
+}
+
+std::string Int128::toString() const {
+  // 10**18 - the largest power of 10 less than 63 bits
+  const Int128 tenTo18(0xde0b6b3a7640000);
+  // 10**36
+  const Int128 tenTo36(0xc097ce7bc90715, 0xb34b9f1000000000);
+  Int128 remainder;
+  std::stringstream buf;
+  bool needFill = false;
+
+  // get anything above 10**36 and print it
+  Int128 top = divide(tenTo36, remainder);
+  if (top != 0) {
+    buf << top.toLong();
+    remainder.abs();
+    needFill = true;
+  }
+
+  // now get anything above 10**18 and print it
+  Int128 tail;
+  top = remainder.divide(tenTo18, tail);
+  if (needFill || top != 0) {
+    if (needFill) {
+      buf << std::setw(18) << std::setfill('0');
+    } else {
+      needFill = true;
+      tail.abs();
+    }
+    buf << top.toLong();
+  }
+
+  // finally print the tail, which is less than 10**18
+  if (needFill) {
+    buf << std::setw(18) << std::setfill('0');
+  }
+  buf << tail.toLong();
+  return buf.str();
+}
+
+std::string Int128::toDecimalString(int32_t scale) const {
+  std::string str = toString();
+  if (scale == 0) {
+    return str;
+  } else if (*this < 0) {
+    int32_t len = static_cast<int32_t>(str.length());
+    if (len - 1 > scale) {
+      return str.substr(0, static_cast<size_t>(len - scale)) + "." +
+             str.substr(static_cast<size_t>(len - scale),
+                        static_cast<size_t>(scale));
+    } else if (len - 1 == scale) {
+      return "-0." + str.substr(1, std::string::npos);
+    } else {
+      std::string result = "-0.";
+      for (int32_t i = 0; i < scale - len + 1; ++i) {
+        result += "0";
+      }
+      return result + str.substr(1, std::string::npos);
+    }
+  } else {
+    int32_t len = static_cast<int32_t>(str.length());
+    if (len > scale) {
+      return str.substr(0, static_cast<size_t>(len - scale)) + "." +
+             str.substr(static_cast<size_t>(len - scale),
+                        static_cast<size_t>(scale));
+    } else if (len == scale) {
+      return "0." + str;
+    } else {
+      std::string result = "0.";
+      for (int32_t i = 0; i < scale - len; ++i) {
+        result += "0";
+      }
+      return result + str;
+    }
+  }
+}
+
+std::string Int128::toHexString() const {
+  std::stringstream buf;
+  buf << std::hex << "0x" << std::setw(16) << std::setfill('0') << highbits
+      << std::setw(16) << std::setfill('0') << lowbits;
+  return buf.str();
+}
+
+const static int32_t MAX_PRECISION_64 = 18;                   // NOLINT
+const static int64_t POWERS_OF_TEN[MAX_PRECISION_64 + 1] = {  // NOLINT
+    1,
+    10,
+    100,
+    1000,
+    10000,
+    100000,
+    1000000,
+    10000000,
+    100000000,
+    1000000000,
+    10000000000,
+    100000000000,
+    1000000000000,
+    10000000000000,
+    100000000000000,
+    1000000000000000,
+    10000000000000000,
+    100000000000000000,
+    1000000000000000000};
+
+Int128 scaleUpInt128ByPowerOfTen(Int128 value, int32_t power,
+                                 bool& overflow) {  // NOLINT
+  overflow = false;
+  Int128 remainder;
+
+  while (power > 0) {
+    int32_t step = std::min(power, MAX_PRECISION_64);
+    if (value > 0 &&
+        Int128::maximumValue().divide(POWERS_OF_TEN[step], remainder) < value) {
+      overflow = true;
+      return Int128::maximumValue();
+    } else if (value < 0 && Int128::minimumValue().divide(POWERS_OF_TEN[step],
+                                                          remainder) > value) {
+      overflow = true;
+      return Int128::minimumValue();
+    }
+
+    value *= POWERS_OF_TEN[step];
+    power -= step;
+  }
+
+  return value;
+}
+
+Int128 scaleDownInt128ByPowerOfTen(Int128 value, int32_t power) {
+  Int128 remainder;
+  while (power > 0) {
+    int32_t step = std::min(std::abs(power), MAX_PRECISION_64);
+    value = value.divide(POWERS_OF_TEN[step], remainder);
+    power -= step;
+  }
+  return value;
+}
+
+}  // namespace orc
diff --git a/depends/storage/src/storage/format/orc/int128.h b/depends/storage/src/storage/format/orc/int128.h
new file mode 100644
index 0000000..5949dee
--- /dev/null
+++ b/depends/storage/src/storage/format/orc/int128.h
@@ -0,0 +1,304 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef STORAGE_SRC_STORAGE_FORMAT_ORC_INT128_H_
+#define STORAGE_SRC_STORAGE_FORMAT_ORC_INT128_H_
+
+#include <stdexcept>
+#include <string>
+
+#include "dbcommon/log/logger.h"
+
+namespace orc {
+
+// Represents a signed 128-bit integer in two's complement.
+// Calculations wrap around and overflow is ignored.
+//
+// For a discussion of the algorithms, look at Knuth's volume 2,
+// Semi-numerical Algorithms section 4.3.1.
+class Int128 {
+ public:
+  Int128() {
+    highbits = 0;
+    lowbits = 0;
+  }
+
+  // Convert a signed 64 bit value into an Int128.
+  Int128(int64_t right) {  // NOLINT
+    if (right >= 0) {
+      highbits = 0;
+      lowbits = static_cast<uint64_t>(right);
+    } else {
+      highbits = -1;
+      lowbits = static_cast<uint64_t>(right);
+    }
+  }
+
+  // Create from the twos complement representation.
+  Int128(int64_t high, uint64_t low) {
+    highbits = high;
+    lowbits = low;
+  }
+
+  // Parse the number from a base 10 string representation.
+  explicit Int128(const std::string&);
+
+  // Maximum positive value allowed by the type.
+  static Int128 maximumValue();
+
+  // Minimum negative value allowed by the type.
+  static Int128 minimumValue();
+
+  Int128& negate() {
+    lowbits = ~lowbits + 1;
+    highbits = ~highbits;
+    if (lowbits == 0) {
+      highbits += 1;
+    }
+    return *this;
+  }
+
+  Int128& abs() {
+    if (highbits < 0) {
+      negate();
+    }
+    return *this;
+  }
+
+  Int128& invert() {
+    lowbits = ~lowbits;
+    highbits = ~highbits;
+    return *this;
+  }
+
+  // Add a number to this one. The result is truncated to 128 bits.
+  // @param right the number to add
+  // @return *this
+  Int128& operator+=(const Int128& right) {
+    uint64_t sum = lowbits + right.lowbits;
+    highbits += right.highbits;
+    if (sum < lowbits) {
+      highbits += 1;
+    }
+    lowbits = sum;
+    return *this;
+  }
+
+  // Subtract a number from this one. The result is truncated to 128 bits.
+  // @param right the number to subtract
+  // @return *this
+  Int128& operator-=(const Int128& right) {
+    uint64_t diff = lowbits - right.lowbits;
+    highbits -= right.highbits;
+    if (diff > lowbits) {
+      highbits -= 1;
+    }
+    lowbits = diff;
+    return *this;
+  }
+
+  // Multiply this number by a number. The result is truncated to 128 bits.
+  // @param right the number to multiply by
+  // @return *this
+  Int128& operator*=(const Int128& right);
+
+  // Divide this number by right and return the result. This operation is
+  // not destructive.
+  //
+  // The answer rounds to zero. Signs work like:
+  //    21 /  5 ->  4,  1
+  //   -21 /  5 -> -4, -1
+  //    21 / -5 -> -4,  1
+  //   -21 / -5 ->  4, -1
+  // @param right the number to divide by
+  // @param remainder the remainder after the division
+  Int128 divide(const Int128& right, Int128& remainder) const;  // NOLINT
+
+  // Logical or between two Int128.
+  // @param right the number to or in
+  // @return *this
+  Int128& operator|=(const Int128& right) {
+    lowbits |= right.lowbits;
+    highbits |= right.highbits;
+    return *this;
+  }
+
+  // Logical and between two Int128.
+  // @param right the number to and in
+  // @return *this
+  Int128& operator&=(const Int128& right) {
+    lowbits &= right.lowbits;
+    highbits &= right.highbits;
+    return *this;
+  }
+
+  // Shift left by the given number of bits.
+  // Values larger than 2**127 will shift into the sign bit.
+  Int128& operator<<=(uint32_t bits) {
+    if (bits != 0) {
+      if (bits < 64) {
+        highbits <<= bits;
+        highbits |= (lowbits >> (64 - bits));
+        lowbits <<= bits;
+      } else if (bits < 128) {
+        highbits = static_cast<int64_t>(lowbits) << (bits - 64);
+        lowbits = 0;
+      } else {
+        highbits = 0;
+        lowbits = 0;
+      }
+    }
+    return *this;
+  }
+
+  // Shift right by the given number of bits. Negative values will
+  // sign extend and fill with one bits.
+  Int128& operator>>=(uint32_t bits) {
+    if (bits != 0) {
+      if (bits < 64) {
+        lowbits >>= bits;
+        lowbits |= static_cast<uint64_t>(highbits << (64 - bits));
+        highbits =
+            static_cast<int64_t>(static_cast<uint64_t>(highbits) >> bits);
+      } else if (bits < 128) {
+        lowbits = static_cast<uint64_t>(highbits >> (bits - 64));
+        highbits = highbits >= 0 ? 0 : -1l;
+      } else {
+        highbits = highbits >= 0 ? 0 : -1l;
+        lowbits = static_cast<uint64_t>(highbits);
+      }
+    }
+    return *this;
+  }
+
+  bool operator==(const Int128& right) const {
+    return highbits == right.highbits && lowbits == right.lowbits;
+  }
+
+  bool operator!=(const Int128& right) const {
+    return highbits != right.highbits || lowbits != right.lowbits;
+  }
+
+  bool operator<(const Int128& right) const {
+    if (highbits == right.highbits) {
+      return lowbits < right.lowbits;
+    } else {
+      return highbits < right.highbits;
+    }
+  }
+
+  bool operator<=(const Int128& right) const {
+    if (highbits == right.highbits) {
+      return lowbits <= right.lowbits;
+    } else {
+      return highbits <= right.highbits;
+    }
+  }
+
+  bool operator>(const Int128& right) const {
+    if (highbits == right.highbits) {
+      return lowbits > right.lowbits;
+    } else {
+      return highbits > right.highbits;
+    }
+  }
+
+  bool operator>=(const Int128& right) const {
+    if (highbits == right.highbits) {
+      return lowbits >= right.lowbits;
+    } else {
+      return highbits >= right.highbits;
+    }
+  }
+
+  uint32_t hash() const {
+    return static_cast<uint32_t>(highbits >> 32) ^
+           static_cast<uint32_t>(highbits) ^
+           static_cast<uint32_t>(lowbits >> 32) ^
+           static_cast<uint32_t>(lowbits);
+  }
+
+  // Does this value fit into a long?
+  bool fitsInLong() const {
+    switch (highbits) {
+      case 0:
+        return !(lowbits & LONG_SIGN_BIT);
+      case -1:
+        return lowbits & LONG_SIGN_BIT;
+      default:
+        return false;
+    }
+  }
+
+  // Convert the value to a long and
+  int64_t toLong() const {
+    if (fitsInLong()) {
+      return static_cast<int64_t>(lowbits);
+    }
+    LOG_ERROR(ERRCODE_INTERNAL_ERROR, "Int128 too large to convert to long");
+  }
+
+  // Return the base 10 string representation of the integer.
+  std::string toString() const;
+
+  // Return the base 10 string representation with a decimal point,
+  // the given number of places after the decimal.
+  std::string toDecimalString(int32_t scale = 0) const;
+
+  // Return the base 16 string representation of the two's complement with
+  // a prefix of "0x".
+  // Int128(-1).toHexString() = "0xffffffffffffffffffffffffffffffff".
+  std::string toHexString() const;
+
+  // Get the high bits of the twos complement representation of the number.
+  int64_t getHighBits() { return highbits; }
+
+  // Get the low bits of the twos complement representation of the number.
+  uint64_t getLowBits() { return lowbits; }
+
+  // Represent the absolute number as a list of uint32.
+  // Visible for testing only.
+  // @param array the array that is set to the value of the number
+  // @param wasNegative set to true if the original number was negative
+  // @return the number of elements that were set in the array (1 to 4)
+  int64_t fillInArray(uint32_t* array, bool& wasNegative) const;  // NOLINT
+
+ private:
+  static const uint64_t LONG_SIGN_BIT = 0x8000000000000000u;
+  int64_t highbits;
+  uint64_t lowbits;
+};
+
+/**
+ * Scales up an Int128 value
+ * @param value the Int128 value to scale
+ * @param power the scale offset. Result of a negative factor is undefined.
+ * @param overflow returns whether the result overflows or not
+ * @return the scaled value
+ */
+Int128 scaleUpInt128ByPowerOfTen(Int128 value, int32_t power, bool& overflow);
+/**
+ * Scales down an Int128 value
+ * @param value the Int128 value to scale
+ * @param power the scale offset. Result of a negative factor is undefined.
+ * @return the scaled value
+ */
+Int128 scaleDownInt128ByPowerOfTen(Int128 value, int32_t power);
+}  // end of namespace orc
+#endif  // STORAGE_SRC_STORAGE_FORMAT_ORC_INT128_H_
diff --git a/depends/storage/src/storage/format/orc/lzo-decompressor.cc b/depends/storage/src/storage/format/orc/lzo-decompressor.cc
new file mode 100644
index 0000000..2752fec
--- /dev/null
+++ b/depends/storage/src/storage/format/orc/lzo-decompressor.cc
@@ -0,0 +1,396 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <sstream>
+#include <string>
+
+#include "dbcommon/log/logger.h"
+#include "storage/format/orc/exceptions.h"
+#include "storage/format/orc/lzo-decompressor.h"
+
+namespace orc {
+
+static const int32_t DEC_32_TABLE[] = {4, 1, 2, 1, 4, 4, 4, 4};
+static const int32_t DEC_64_TABLE[] = {0, 0, 0, -1, 0, 1, 2, 3};
+
+static const int32_t SIZE_OF_SHORT = 2;
+static const int32_t SIZE_OF_INT = 4;
+static const int32_t SIZE_OF_LONG = 8;
+
+static std::string toHex(uint64_t val) {
+  std::ostringstream out;
+  out << "0x" << std::hex << val;
+  return out.str();
+}
+
+static std::string toString(int64_t val) {
+  std::ostringstream out;
+  out << val;
+  return out.str();
+}
+
+class MalformedInputException : public ParseError {
+ public:
+  explicit MalformedInputException(int64_t off)
+      : ParseError("MalformedInputException at " + toString(off)) {}
+
+  MalformedInputException(int64_t off, const std::string &msg)
+      : ParseError("MalformedInputException " + msg + " at " + toString(off)) {}
+
+  MalformedInputException(const MalformedInputException &other)
+      : ParseError(other.what()) {}
+
+  virtual ~MalformedInputException() noexcept;
+};
+
+MalformedInputException::~MalformedInputException() noexcept {
+  // PASS
+}
+
+uint64_t lzoDecompress(const char *inputAddress, const char *inputLimit,
+                       char *outputAddress, char *outputLimit) {
+  // nothing compresses to nothing
+  if (inputAddress == inputLimit) {
+    return 0;
+  }
+
+  // maximum offset in buffers to which it's safe to write long-at-a-time
+  char *const fastOutputLimit = outputLimit - SIZE_OF_LONG;
+
+  // LZO can concat two blocks together so, decode until the input data is
+  // consumed
+  const char *input = inputAddress;
+  char *output = outputAddress;
+  while (input < inputLimit) {
+    //
+    // Note: For safety some of the code below may stop decoding early or
+    // skip decoding, because input is not available.  This makes the code
+    // safe, and since LZO requires an explicit "stop" command, the decoder
+    // will still throw a exception.
+    //
+
+    bool firstCommand = true;
+    uint32_t lastLiteralLength = 0;
+    while (true) {
+      if (input >= inputLimit) {
+        LOG_ERROR(ERRCODE_INTERNAL_ERROR, "MalformedInputException at %ld",
+                  input - inputAddress);
+      }
+      uint32_t command = *(input++) & 0xFF;
+      if (command == 0x11) {
+        break;
+      }
+
+      // Commands are described using a bit pattern notation:
+      // 0: bit is not set
+      // 1: bit is set
+      // L: part of literal length
+      // P: part of match offset position
+      // M: part of match length
+      // ?: see documentation in command decoder
+
+      int32_t matchLength;
+      int32_t matchOffset;
+      uint32_t literalLength;
+      if ((command & 0xf0) == 0) {
+        if (lastLiteralLength == 0) {
+          // 0b0000_LLLL (0bLLLL_LLLL)*
+
+          // copy length :: fixed
+          //   0
+          matchOffset = 0;
+
+          // copy offset :: fixed
+          //   0
+          matchLength = 0;
+
+          // literal length - 3 :: variable bits :: valid range [4..]
+          //   3 + variableLength(command bits [0..3], 4)
+          literalLength = command & 0xf;
+          if (literalLength == 0) {
+            literalLength = 0xf;
+
+            uint32_t nextByte = 0;
+            while (input < inputLimit && (nextByte = *(input++) & 0xFF) == 0) {
+              literalLength += 0xff;
+            }
+            literalLength += nextByte;
+          }
+          literalLength += 3;
+        } else if (lastLiteralLength <= 3) {
+          // 0b0000_PPLL 0bPPPP_PPPP
+
+          // copy length: fixed
+          //   3
+          matchLength = 3;
+
+          // copy offset :: 12 bits :: valid range [2048..3071]
+          //   [0..1] from command [2..3]
+          //   [2..9] from trailer [0..7]
+          //   [10] unset
+          //   [11] set
+          if (input >= inputLimit) {
+            LOG_ERROR(ERRCODE_INTERNAL_ERROR, "MalformedInputException at %ld",
+                      input - inputAddress);
+          }
+          matchOffset = (command & 0xc) >> 2;
+          matchOffset |= (*(input++) & 0xFF) << 2;
+          matchOffset |= 0x800;
+
+          // literal length :: 2 bits :: valid range [0..3]
+          //   [0..1] from command [0..1]
+          literalLength = (command & 0x3);
+        } else {
+          // 0b0000_PPLL 0bPPPP_PPPP
+
+          // copy length :: fixed
+          //   2
+          matchLength = 2;
+
+          // copy offset :: 10 bits :: valid range [0..1023]
+          //   [0..1] from command [2..3]
+          //   [2..9] from trailer [0..7]
+          if (input >= inputLimit) {
+            LOG_ERROR(ERRCODE_INTERNAL_ERROR, "MalformedInputException at %ld",
+                      input - inputAddress);
+          }
+          matchOffset = (command & 0xc) >> 2;
+          matchOffset |= (*(input++) & 0xFF) << 2;
+
+          // literal length :: 2 bits :: valid range [0..3]
+          //   [0..1] from command [0..1]
+          literalLength = (command & 0x3);
+        }
+      } else if (firstCommand) {
+        // first command has special handling when high nibble is set
+        matchLength = 0;
+        matchOffset = 0;
+        literalLength = command - 17;
+      } else if ((command & 0xf0) == 0x10) {
+        // 0b0001_?MMM (0bMMMM_MMMM)* 0bPPPP_PPPP_PPPP_PPLL
+
+        // copy length - 2 :: variable bits :: valid range [3..]
+        //   2 + variableLength(command bits [0..2], 3)
+        matchLength = command & 0x7;
+        if (matchLength == 0) {
+          matchLength = 0x7;
+
+          int32_t nextByte = 0;
+          while (input < inputLimit && (nextByte = *(input++) & 0xFF) == 0) {
+            matchLength += 0xff;
+          }
+          matchLength += nextByte;
+        }
+        matchLength += 2;
+
+        // read trailer
+        if (input + SIZE_OF_SHORT > inputLimit) {
+          LOG_ERROR(ERRCODE_INTERNAL_ERROR, "MalformedInputException at %ld",
+                    input - inputAddress);
+        }
+        uint32_t trailer = *reinterpret_cast<const uint16_t *>(input) & 0xFFFF;
+        input += SIZE_OF_SHORT;
+
+        // copy offset :: 16 bits :: valid range [32767..49151]
+        //   [0..13] from trailer [2..15]
+        //   [14] if command bit [3] unset
+        //   [15] if command bit [3] set
+        matchOffset = trailer >> 2;
+        if ((command & 0x8) == 0) {
+          matchOffset |= 0x4000;
+        } else {
+          matchOffset |= 0x8000;
+        }
+        matchOffset--;
+
+        // literal length :: 2 bits :: valid range [0..3]
+        //   [0..1] from trailer [0..1]
+        literalLength = trailer & 0x3;
+      } else if ((command & 0xe0) == 0x20) {
+        // 0b001M_MMMM (0bMMMM_MMMM)* 0bPPPP_PPPP_PPPP_PPLL
+
+        // copy length - 2 :: variable bits :: valid range [3..]
+        //   2 + variableLength(command bits [0..4], 5)
+        matchLength = command & 0x1f;
+        if (matchLength == 0) {
+          matchLength = 0x1f;
+
+          int nextByte = 0;
+          while (input < inputLimit && (nextByte = *(input++) & 0xFF) == 0) {
+            matchLength += 0xff;
+          }
+          matchLength += nextByte;
+        }
+        matchLength += 2;
+
+        // read trailer
+        if (input + SIZE_OF_SHORT > inputLimit) {
+          LOG_ERROR(ERRCODE_INTERNAL_ERROR, "MalformedInputException at %ld",
+                    input - inputAddress);
+        }
+        int32_t trailer = *reinterpret_cast<const int16_t *>(input) & 0xFFFF;
+        input += SIZE_OF_SHORT;
+
+        // copy offset :: 14 bits :: valid range [0..16383]
+        //  [0..13] from trailer [2..15]
+        matchOffset = trailer >> 2;
+
+        // literal length :: 2 bits :: valid range [0..3]
+        //   [0..1] from trailer [0..1]
+        literalLength = trailer & 0x3;
+      } else if ((command & 0xc0) != 0) {
+        // 0bMMMP_PPLL 0bPPPP_PPPP
+
+        // copy length - 1 :: 3 bits :: valid range [1..8]
+        //   [0..2] from command [5..7]
+        //   add 1
+        matchLength = (command & 0xe0) >> 5;
+        matchLength += 1;
+
+        // copy offset :: 11 bits :: valid range [0..4095]
+        //   [0..2] from command [2..4]
+        //   [3..10] from trailer [0..7]
+        if (input >= inputLimit) {
+          LOG_ERROR(ERRCODE_INTERNAL_ERROR, "MalformedInputException at %ld",
+                    input - inputAddress);
+        }
+        matchOffset = (command & 0x1c) >> 2;
+        matchOffset |= (*(input++) & 0xFF) << 3;
+
+        // literal length :: 2 bits :: valid range [0..3]
+        //   [0..1] from command [0..1]
+        literalLength = (command & 0x3);
+      } else {
+        LOG_ERROR(ERRCODE_INTERNAL_ERROR,
+                  "MalformedInputException: Invalid LZO command %s at %ld",
+                  toHex(command).c_str(), input - inputAddress - 1);
+      }
+      firstCommand = false;
+
+      // copy match
+      if (matchLength != 0) {
+        // lzo encodes match offset minus one
+        matchOffset++;
+
+        char *matchAddress = output - matchOffset;
+        if (matchAddress < outputAddress ||
+            output + matchLength > outputLimit) {
+          throw MalformedInputException(input - inputAddress);
+        }
+        char *matchOutputLimit = output + matchLength;
+
+        if (output > fastOutputLimit) {
+          // slow match copy
+          while (output < matchOutputLimit) {
+            *(output++) = *(matchAddress++);
+          }
+        } else {
+          // copy repeated sequence
+          if (matchOffset < SIZE_OF_LONG) {
+            // 8 bytes apart so that we can copy long-at-a-time below
+            int32_t increment32 = DEC_32_TABLE[matchOffset];
+            int32_t decrement64 = DEC_64_TABLE[matchOffset];
+
+            output[0] = *matchAddress;
+            output[1] = *(matchAddress + 1);
+            output[2] = *(matchAddress + 2);
+            output[3] = *(matchAddress + 3);
+            output += SIZE_OF_INT;
+            matchAddress += increment32;
+
+            *reinterpret_cast<int32_t *>(output) =
+                *reinterpret_cast<int32_t *>(matchAddress);
+            output += SIZE_OF_INT;
+            matchAddress -= decrement64;
+          } else {
+            *reinterpret_cast<int64_t *>(output) =
+                *reinterpret_cast<int64_t *>(matchAddress);
+            matchAddress += SIZE_OF_LONG;
+            output += SIZE_OF_LONG;
+          }
+
+          if (matchOutputLimit >= fastOutputLimit) {
+            if (matchOutputLimit > outputLimit) {
+              LOG_ERROR(ERRCODE_INTERNAL_ERROR,
+                        "MalformedInputException at %ld", input - inputAddress);
+            }
+
+            while (output < fastOutputLimit) {
+              *reinterpret_cast<int64_t *>(output) =
+                  *reinterpret_cast<int64_t *>(matchAddress);
+              matchAddress += SIZE_OF_LONG;
+              output += SIZE_OF_LONG;
+            }
+
+            while (output < matchOutputLimit) {
+              *(output++) = *(matchAddress++);
+            }
+          } else {
+            while (output < matchOutputLimit) {
+              *reinterpret_cast<int64_t *>(output) =
+                  *reinterpret_cast<int64_t *>(matchAddress);
+              matchAddress += SIZE_OF_LONG;
+              output += SIZE_OF_LONG;
+            }
+          }
+        }
+        output = matchOutputLimit;  // correction in case we over-copied
+      }
+
+      // copy literal
+      char *literalOutputLimit = output + literalLength;
+      if (literalOutputLimit > fastOutputLimit ||
+          input + literalLength > inputLimit - SIZE_OF_LONG) {
+        if (literalOutputLimit > outputLimit) {
+          LOG_ERROR(ERRCODE_INTERNAL_ERROR, "MalformedInputException at %ld",
+                    input - inputAddress);
+        }
+
+        // slow, precise copy
+        memcpy(output, input, literalLength);
+        input += literalLength;
+        output += literalLength;
+      } else {
+        // fast copy. We may over-copy but there's enough room in input
+        // and output to not overrun them
+        do {
+          *reinterpret_cast<int64_t *>(output) =
+              *reinterpret_cast<const int64_t *>(input);
+          input += SIZE_OF_LONG;
+          output += SIZE_OF_LONG;
+        } while (output < literalOutputLimit);
+        // adjust index if we over-copied
+        input -= (output - literalOutputLimit);
+        output = literalOutputLimit;
+      }
+      lastLiteralLength = literalLength;
+    }
+
+    if (input + SIZE_OF_SHORT > inputLimit &&
+        *reinterpret_cast<const int16_t *>(input) != 0) {
+      LOG_ERROR(ERRCODE_INTERNAL_ERROR, "MalformedInputException at %ld",
+                input - inputAddress);
+    }
+    input += SIZE_OF_SHORT;
+  }
+
+  return static_cast<uint64_t>(output - outputAddress);
+}
+
+}  // namespace orc
diff --git a/depends/storage/src/storage/format/orc/lzo-decompressor.h b/depends/storage/src/storage/format/orc/lzo-decompressor.h
new file mode 100644
index 0000000..f163f3e
--- /dev/null
+++ b/depends/storage/src/storage/format/orc/lzo-decompressor.h
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef STORAGE_SRC_STORAGE_FORMAT_ORC_LZO_DECOMPRESSOR_H_
+#define STORAGE_SRC_STORAGE_FORMAT_ORC_LZO_DECOMPRESSOR_H_
+
+namespace orc {
+
+// Decompress the bytes in to the output buffer.
+// @param inputAddress the start of the input
+// @param inputLimit one past the last byte of the input
+// @param outputAddress the start of the output buffer
+// @param outputLimit one past the last byte of the output buffer
+// @result the number of bytes decompressed
+uint64_t lzoDecompress(const char *inputAddress, const char *inputLimit,
+                       char *outputAddress, char *outputLimit);
+}  // namespace orc
+
+#endif  // STORAGE_SRC_STORAGE_FORMAT_ORC_LZO_DECOMPRESSOR_H_
diff --git a/depends/storage/src/storage/format/orc/orc-format-reader.cc b/depends/storage/src/storage/format/orc/orc-format-reader.cc
new file mode 100644
index 0000000..071623e
--- /dev/null
+++ b/depends/storage/src/storage/format/orc/orc-format-reader.cc
@@ -0,0 +1,278 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "storage/format/orc/orc-format-reader.h"
+
+#include <list>
+#include <memory>
+#include <utility>
+
+#include "dbcommon/common/vector.h"
+#include "dbcommon/common/vector/decimal-vector.h"
+#include "dbcommon/common/vector/list-vector.h"
+#include "dbcommon/common/vector/struct-vector.h"
+#include "dbcommon/common/vector/variable-length-vector.h"
+#include "dbcommon/utils/global.h"
+#include "dbcommon/utils/url.h"
+
+#include "storage/format/orc/input-stream.h"
+
+namespace storage {
+
+void ORCFormatReader::beginRead(
+    dbcommon::FileSystemManagerInterface *fsManager,
+    const univplan::UnivPlanScanFileSplitListList *splits,
+    std::vector<bool> *columnsToRead, uint32_t nTuplesPerBatch,
+    const univplan::UnivPlanExprPolyList *predicateExprs,
+    const dbcommon::TupleDesc *td, bool readStatsOnly) {
+  assert(fsManager != nullptr && splits != nullptr);
+
+  this->fsManager = fsManager;
+  this->splits = splits;
+  this->nTuplesPerBatch = nTuplesPerBatch;
+  this->columnsToRead = columnsToRead;
+
+  if (columnsToRead != nullptr) {
+    std::list<uint64_t> toRead;
+    for (uint32_t i = 0; i < columnsToRead->size(); i++) {
+      if ((*columnsToRead)[i]) {
+        toRead.push_back(i);
+      }
+    }
+
+    opts.include(toRead);
+  }
+
+  opts.setPredicateExprs(predicateExprs);
+  opts.setTupleDesc(td);
+  opts.setReadStatsOnlyFlag(readStatsOnly);
+}
+
+void ORCFormatReader::startNewSplit() {
+  assert(currentSplitIdx < splits->front()->splits_size());
+
+  std::string splitFilename;
+  splits->front()->splits_filename(currentSplitIdx, &splitFilename);
+  bool reuseInputStream = false;
+  if (currentSplitIdx > 0) {
+    std::string splitFilenamePrev;
+    splits->front()->splits_filename(currentSplitIdx - 1, &splitFilenamePrev);
+    if (splitFilename == splitFilenamePrev) {
+      reuseInputStream = true;
+    }
+  }
+  std::unique_ptr<orc::InputStream> inputStream;
+  if (reuseInputStream) {
+    inputStream = orcReader->ownInputStream();
+  } else {
+    dbcommon::URL url(splitFilename);
+    if (orcReader) {
+      inputStream = orcReader->ownInputStream();
+      inputStream.reset(nullptr);
+    }
+    inputStream = orc::readFile(fsManager->get(url.getNormalizedServiceName()),
+                                url.getPath());
+  }
+  opts.range(splits->front()->splits_start(currentSplitIdx),
+             splits->front()->splits_len(currentSplitIdx));
+  orcReader = orc::createReader(std::move(inputStream), opts);
+
+  if (batch == nullptr) {
+    batch = orcReader->createRowBatch(this->nTuplesPerBatch);
+  }
+}
+
+bool ORCFormatReader::hasSomethingToRead() {
+  if (startAnotherSplit) {
+    startAnotherSplit = false;
+    currentSplitIdx++;
+    while (true) {
+      if (currentSplitIdx >= splits->front()->splits_size()) {
+        return false;
+      }
+      // skip empty split
+      if (splits->front()->splits_len(currentSplitIdx) > 0) {
+        startNewSplit();
+        return true;
+      }
+      currentSplitIdx++;
+    }
+  }
+  return true;
+}
+
+dbcommon::TupleBatch::uptr ORCFormatReader::read() {
+  while (hasSomethingToRead()) {
+    if (batch && orcReader->next(*batch)) {
+      return createTupleBatch(batch.get());
+    } else {
+      startAnotherSplit = true;
+      if (orcReader && opts.getPredicateExprs())
+        orcReader->collectPredicateStats(&scannedStripe, &skippedStripe);
+    }
+  }
+
+  orcReader.reset(nullptr);
+  if (batch) batch.reset(nullptr);
+  return dbcommon::TupleBatch::uptr(nullptr);
+}
+
+void ORCFormatReader::endRead() {
+  if (opts.getPredicateExprs())
+    LOG_INFO("Predicate Info: current qe scan %u stripes, skip %u stripes",
+             scannedStripe, skippedStripe);
+}
+
+void ORCFormatReader::reset() {
+  startAnotherSplit = true;
+  currentSplitIdx = -1;
+  skippedStripe = 0;
+  scannedStripe = 0;
+}
+
+dbcommon::TupleBatch::uptr ORCFormatReader::createTupleBatch(
+    orc::ColumnVectorBatch *batch) {
+  orc::StructVectorBatch *structBatch =
+      dynamic_cast<orc::StructVectorBatch *>(batch);
+  assert(structBatch != nullptr);
+
+  uint32_t nCols = columnsToRead != nullptr ? columnsToRead->size()
+                                            : structBatch->fields.size();
+  dbcommon::TupleBatch::uptr tbatch(new dbcommon::TupleBatch(nCols));
+
+  std::vector<orc::ColumnVectorBatch *>::iterator it =
+      structBatch->fields.begin();
+
+  tbatch->setNumOfRows(structBatch->numElements);
+
+  for (uint32_t colIdx = 0; colIdx < nCols; colIdx++) {
+    if (columnsToRead && !columnsToRead->at(colIdx)) {
+      continue;
+    }
+
+    orc::ColumnVectorBatch *b = *it++;
+
+    std::unique_ptr<dbcommon::Vector> v;
+    if (b->getType() == orc::ORCTypeKind::LIST) {
+      orc::ListVectorBatch *lb = dynamic_cast<orc::ListVectorBatch *>(b);
+      v = lb->buildVector(lb->elements->getType());
+    } else if (b->getType() == orc::ORCTypeKind::DECIMAL) {
+      v = b->buildVector((opts.getTupleDesc())->getColumnType(colIdx));
+    } else if (b->getType() == orc::ORCTypeKind::TIMESTAMP) {
+      v = b->buildVector((opts.getTupleDesc())->getColumnType(colIdx));
+    } else {
+      v = b->buildVector();
+    }
+
+    if (b->hasStats && b->getType() != orc::ORCTypeKind::TIMESTAMP) {
+      v->setVectorStatistics(b->stats);
+      // append one dummy item
+      v->append("1", false);
+      tbatch->setColumn(colIdx, std::move(v), false);
+      tbatch->setNumOfRows(1);
+      continue;
+    }
+
+    switch (b->getType()) {
+      case orc::ORCTypeKind::BOOLEAN:
+      case orc::ORCTypeKind::BYTE:
+      case orc::ORCTypeKind::SHORT:
+      case orc::ORCTypeKind::INT:
+      case orc::ORCTypeKind::LONG:
+      case orc::ORCTypeKind::FLOAT:
+      case orc::ORCTypeKind::DOUBLE:
+      case orc::ORCTypeKind::DATE:
+      case orc::ORCTypeKind::TIME: {
+        v->setValue(b->getData(), b->numElements * b->getWidth());
+        v->setHasNull(b->hasNulls);
+        if (b->hasNulls) v->setNotNulls(b->getNotNull(), b->numElements);
+        assert(v->isValid());
+        break;
+      }
+      case orc::ORCTypeKind::TIMESTAMP: {
+        v->setValue(b->getData(), b->numElements * b->getWidth() / 2);
+        v->setNanoseconds(b->getNanoseconds(),
+                          b->numElements * b->getWidth() / 2);
+        v->setHasNull(b->hasNulls);
+        if (b->hasNulls) v->setNotNulls(b->getNotNull(), b->numElements);
+        assert(v->isValid());
+        break;
+      }
+      case orc::ORCTypeKind::DECIMAL: {
+        assert(dynamic_cast<dbcommon::DecimalVector *>(v.get()));
+        uint64_t count = b->numElements;
+        v->setAuxiliaryValue(b->getAuxiliaryData(),
+                             b->numElements * b->getWidth() / 3);
+        v->setValue(b->getData(), b->numElements * b->getWidth() / 3);
+        v->setScaleValue(b->getScaleData(), b->numElements * b->getWidth() / 3);
+        v->setHasNull(b->hasNulls);
+        if (b->hasNulls) v->setNotNulls(b->getNotNull(), b->numElements);
+        assert(v->isValid());
+        break;
+      }
+      case orc::ORCTypeKind::CHAR:
+      case orc::ORCTypeKind::VARCHAR:
+      case orc::ORCTypeKind::STRING:
+      case orc::ORCTypeKind::BINARY: {
+        orc::BytesVectorBatch *sb = dynamic_cast<orc::BytesVectorBatch *>(b);
+        v->setLengths(reinterpret_cast<uint64_t *>(sb->length.data()),
+                      sb->numElements);
+        // todo: memory leak? when is the ownership of values
+        v->setValPtrs((const char **)sb->data.data(), sb->numElements);
+        v->setHasNull(b->hasNulls);
+        if (b->hasNulls) v->setNotNulls(b->getNotNull(), b->numElements);
+        reinterpret_cast<dbcommon::StringVector *>(v.get())->setDirectEncoding(
+            sb->isDirectEncoding);
+
+        assert(v->isValid());
+        break;
+      }
+      case orc::ORCTypeKind::LIST: {
+        orc::ListVectorBatch *lb = dynamic_cast<orc::ListVectorBatch *>(b);
+        dbcommon::ListVector *lv =
+            dynamic_cast<dbcommon::ListVector *>(v.get());
+        lv->setOffsets(reinterpret_cast<uint64_t *>(lb->offsets.data()),
+                       lb->numElements + 1);
+        orc::ColumnVectorBatch *clb = lb->elements.get();
+        std::unique_ptr<dbcommon::Vector> clv = clb->buildVector();
+        // Now only support fixed-length type list
+        clv->setValue(clb->getData(), clb->numElements * clb->getWidth());
+        clv->setHasNull(clb->hasNulls);
+        if (clb->hasNulls)
+          clv->setNotNulls(clb->getNotNull(), clb->numElements);
+        assert(clv->isValid());
+        lv->addChildVector(std::move(clv));
+        lv->setHasNull(lb->hasNulls);
+        if (lb->hasNulls) lv->setNotNulls(lb->getNotNull(), lb->numElements);
+        assert(lv->isValid());
+        break;
+      }
+      default:
+        LOG_ERROR(ERRCODE_FEATURE_NOT_SUPPORTED, "type %d not supported yet",
+                  b->getType());
+        break;
+    }
+
+    tbatch->setColumn(colIdx, std::move(v), false);
+  }
+
+  return std::move(tbatch);
+}
+
+}  // namespace storage
diff --git a/depends/storage/src/storage/format/orc/orc-format-reader.h b/depends/storage/src/storage/format/orc/orc-format-reader.h
new file mode 100644
index 0000000..53807f8
--- /dev/null
+++ b/depends/storage/src/storage/format/orc/orc-format-reader.h
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef STORAGE_SRC_STORAGE_FORMAT_ORC_ORC_FORMAT_READER_H_
+#define STORAGE_SRC_STORAGE_FORMAT_ORC_ORC_FORMAT_READER_H_
+
+#include <string>
+#include <vector>
+
+#include "dbcommon/common/tuple-batch.h"
+#include "dbcommon/filesystem/file-system-manager.h"
+#include "dbcommon/filesystem/file-system.h"
+#include "dbcommon/utils/byte-buffer.h"
+
+#include "storage/format/format.h"
+#include "storage/format/orc/column-printer.h"
+#include "storage/format/orc/reader.h"
+#include "storage/format/orc/seekable-input-stream.h"
+#include "storage/format/orc/vector.h"
+
+namespace storage {
+
+class ORCFormatReader {
+ public:
+  ORCFormatReader() {}
+  virtual ~ORCFormatReader() {}
+
+  void beginRead(dbcommon::FileSystemManagerInterface *fsManager,
+                 const univplan::UnivPlanScanFileSplitListList *splits,
+                 std::vector<bool> *columnsToRead, uint32_t nTuplesPerBatch,
+                 const univplan::UnivPlanExprPolyList *predicateExprs,
+                 const dbcommon::TupleDesc *td, bool readStatsOnly);
+  dbcommon::TupleBatch::uptr read();
+  void endRead();
+  void reset();
+
+ private:
+  void startNewSplit();
+  dbcommon::TupleBatch::uptr createTupleBatch(orc::ColumnVectorBatch *batch);
+  bool hasSomethingToRead();
+
+ private:
+  const univplan::UnivPlanScanFileSplitListList *splits = nullptr;
+  std::vector<bool> *columnsToRead = nullptr;
+  dbcommon::FileSystemManagerInterface *fsManager = nullptr;
+  uint32_t nTuplesPerBatch = Format::kTuplesPerBatch;
+
+  std::unique_ptr<orc::Reader> orcReader;
+  orc::ReaderOptions opts;
+  std::unique_ptr<orc::ColumnVectorBatch> batch;
+  bool startAnotherSplit = true;
+  int32_t currentSplitIdx = -1;
+
+  // count for filter push down
+  uint32_t skippedStripe = 0;
+  uint32_t scannedStripe = 0;
+};
+
+}  // namespace storage
+
+#endif  // STORAGE_SRC_STORAGE_FORMAT_ORC_ORC_FORMAT_READER_H_
diff --git a/depends/storage/src/storage/format/orc/orc-format-writer.cc b/depends/storage/src/storage/format/orc/orc-format-writer.cc
new file mode 100644
index 0000000..c5fd10d
--- /dev/null
+++ b/depends/storage/src/storage/format/orc/orc-format-writer.cc
@@ -0,0 +1,208 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "json/json.h"
+
+// #include "kv/common/cn-global.h"
+// #include "kv/common/configuration.h"
+#include "dbcommon/utils/parameters.h"
+#include "dbcommon/utils/url.h"
+#include "storage/format/orc/orc-format-writer.h"
+
+namespace storage {
+ORCFormatWriter::ORCFormatWriter(
+    dbcommon::FileSystemManagerInterface *fsManager, dbcommon::TupleDesc *td,
+    const char *fileName, uint32_t blockAlignSize, dbcommon::Parameters *p) {
+  this->fsManager = fsManager;
+  this->fileName = fileName;
+
+  dbcommon::URL url(fileName);
+  this->fileSystem = fsManager->get(url.getNormalizedServiceName());
+
+  std::unique_ptr<orc::Type> schema = buildSchema(td);
+  opts.setSchema(std::move(schema));
+  opts.setBlockSize(blockAlignSize);
+
+  assert(p != nullptr);
+  std::string tableOptionStr = p->get("table.options", "");
+  Json::Reader reader;
+  Json::Value root;
+  if (!reader.parse(tableOptionStr, root))
+    LOG_ERROR(ERRCODE_INTERNAL_ERROR, "jsoncpp failed to parse \'%s\'",
+              tableOptionStr.c_str());
+  if (root.isMember("compresstype"))
+    opts.setCompressionKind(root["compresstype"].asCString());
+  if (root.isMember("rlecoder"))
+    opts.setRleVersion(root["rlecoder"].asCString());
+  if (root.isMember("dicthreshold"))
+    opts.setDictKeySizeThreshold(atof(root["dicthreshold"].asCString()));
+  if (root.isMember("bloomfilter")) {
+    std::vector<int> columns;
+    int col_size = root["bloomfilter"].size();
+    for (int i = 0; i < col_size; ++i) {
+      columns.push_back(root["bloomfilter"][i].asInt());
+    }
+    opts.setColumnsToBloomFilter(columns, td->getNumOfColumns());
+  }
+  if (root.isMember("writestats"))
+    opts.setWriteStats(root["writestats"].asBool());
+
+  writer = orc::createWriter(orc::writeFile(fileSystem, url.getPath()), &opts);
+}
+
+void ORCFormatWriter::beginWrite() { writer->begin(); }
+
+void ORCFormatWriter::write(dbcommon::TupleBatch *tb) {
+  writer->addTupleBatch(tb);
+}
+
+void ORCFormatWriter::endWrite() { writer->end(); }
+
+std::unique_ptr<orc::Type> ORCFormatWriter::buildSchema(
+    dbcommon::TupleDesc *td) {
+  assert(td != nullptr);
+
+  std::vector<dbcommon::TypeKind> &types = td->getColumnTypes();
+  std::vector<std::string> &colNames = td->getColumnNames();
+  std::vector<int64_t> &colTypeMod = td->getColumnTypeModifiers();
+
+  std::unique_ptr<orc::Type> ret(new orc::TypeImpl(orc::ORCTypeKind::STRUCT));
+
+  for (uint32_t i = 0; i < types.size(); i++) {
+    dbcommon::TypeKind t = types[i];
+    std::string &name = colNames[i];
+    int64_t typeMod = colTypeMod[i];
+    std::unique_ptr<orc::Type> child;
+    std::unique_ptr<orc::Type> grandchild;
+
+    switch (t) {
+      case dbcommon::TypeKind::TINYINTID:
+        child.reset(new orc::TypeImpl(orc::ORCTypeKind::BYTE));
+        ret->addStructField(name, std::move(child));
+        break;
+      case dbcommon::TypeKind::SMALLINTID:
+        child.reset(new orc::TypeImpl(orc::ORCTypeKind::SHORT));
+        ret->addStructField(name, std::move(child));
+        break;
+      case dbcommon::TypeKind::INTID:
+        child.reset(new orc::TypeImpl(orc::ORCTypeKind::INT));
+        ret->addStructField(name, std::move(child));
+        break;
+      case dbcommon::TypeKind::BIGINTID:
+        child.reset(new orc::TypeImpl(orc::ORCTypeKind::LONG));
+        ret->addStructField(name, std::move(child));
+        break;
+      case dbcommon::TypeKind::FLOATID:
+        child.reset(new orc::TypeImpl(orc::ORCTypeKind::FLOAT));
+        ret->addStructField(name, std::move(child));
+        break;
+      case dbcommon::TypeKind::DOUBLEID:
+        child.reset(new orc::TypeImpl(orc::ORCTypeKind::DOUBLE));
+        ret->addStructField(name, std::move(child));
+        break;
+      case dbcommon::TypeKind::STRINGID:
+        child.reset(new orc::TypeImpl(orc::ORCTypeKind::STRING));
+        ret->addStructField(name, std::move(child));
+        break;
+      case dbcommon::TypeKind::VARCHARID:
+        child.reset(
+            new orc::TypeImpl(orc::ORCTypeKind::VARCHAR,
+                              dbcommon::TypeModifierUtil::getMaxLen(typeMod)));
+        ret->addStructField(name, std::move(child));
+        break;
+      case dbcommon::TypeKind::CHARID:
+        child.reset(
+            new orc::TypeImpl(orc::ORCTypeKind::CHAR,
+                              dbcommon::TypeModifierUtil::getMaxLen(typeMod)));
+        ret->addStructField(name, std::move(child));
+        break;
+      case dbcommon::TypeKind::BOOLEANID:
+        child.reset(new orc::TypeImpl(orc::ORCTypeKind::BOOLEAN));
+        ret->addStructField(name, std::move(child));
+        break;
+      case dbcommon::TypeKind::DATEID:
+        child.reset(new orc::TypeImpl(orc::ORCTypeKind::DATE));
+        ret->addStructField(name, std::move(child));
+        break;
+      case dbcommon::TypeKind::TIMEID:
+        child.reset(new orc::TypeImpl(orc::ORCTypeKind::TIME));
+        ret->addStructField(name, std::move(child));
+        break;
+      case dbcommon::TypeKind::BINARYID:
+        child.reset(new orc::TypeImpl(orc::ORCTypeKind::BINARY));
+        ret->addStructField(name, std::move(child));
+        break;
+      case dbcommon::TypeKind::TIMESTAMPID:
+      case dbcommon::TypeKind::TIMESTAMPTZID:
+        child.reset(new orc::TypeImpl(orc::ORCTypeKind::TIMESTAMP));
+        ret->addStructField(name, std::move(child));
+        break;
+      case dbcommon::TypeKind::DECIMALID:
+      case dbcommon::TypeKind::DECIMALNEWID:
+        child.reset(
+            new orc::TypeImpl(orc::ORCTypeKind::DECIMAL,
+                              dbcommon::TypeModifierUtil::getPrecision(typeMod),
+                              dbcommon::TypeModifierUtil::getScale(typeMod)));
+        ret->addStructField(name, std::move(child));
+        break;
+      case dbcommon::TypeKind::SMALLINTARRAYID:
+        child.reset(new orc::TypeImpl(orc::ORCTypeKind::LIST));
+        grandchild.reset(new orc::TypeImpl(orc::ORCTypeKind::SHORT));
+        child->addStructField(name, std::move(grandchild));
+        ret->addStructField(name, std::move(child));
+        break;
+      case dbcommon::TypeKind::INTARRAYID:
+        child.reset(new orc::TypeImpl(orc::ORCTypeKind::LIST));
+        grandchild.reset(new orc::TypeImpl(orc::ORCTypeKind::INT));
+        child->addStructField(name, std::move(grandchild));
+        ret->addStructField(name, std::move(child));
+        break;
+      case dbcommon::TypeKind::BIGINTARRAYID:
+        child.reset(new orc::TypeImpl(orc::ORCTypeKind::LIST));
+        grandchild.reset(new orc::TypeImpl(orc::ORCTypeKind::LONG));
+        child->addStructField(name, std::move(grandchild));
+        ret->addStructField(name, std::move(child));
+        break;
+      case dbcommon::TypeKind::FLOATARRAYID:
+        child.reset(new orc::TypeImpl(orc::ORCTypeKind::LIST));
+        grandchild.reset(new orc::TypeImpl(orc::ORCTypeKind::FLOAT));
+        child->addStructField(name, std::move(grandchild));
+        ret->addStructField(name, std::move(child));
+        break;
+      case dbcommon::TypeKind::DOUBLEARRAYID:
+        child.reset(new orc::TypeImpl(orc::ORCTypeKind::LIST));
+        grandchild.reset(new orc::TypeImpl(orc::ORCTypeKind::DOUBLE));
+        child->addStructField(name, std::move(grandchild));
+        ret->addStructField(name, std::move(child));
+        break;
+      default:
+        LOG_ERROR(ERRCODE_FEATURE_NOT_SUPPORTED,
+                  "type not supported for orc: %d", t);
+    }
+  }
+
+  ret->assignIds(0);
+  return std::move(ret);
+}
+
+}  // namespace storage
diff --git a/depends/storage/src/storage/format/orc/orc-format-writer.h b/depends/storage/src/storage/format/orc/orc-format-writer.h
new file mode 100644
index 0000000..0cd23aa
--- /dev/null
+++ b/depends/storage/src/storage/format/orc/orc-format-writer.h
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef STORAGE_SRC_STORAGE_FORMAT_ORC_ORC_FORMAT_WRITER_H_
+#define STORAGE_SRC_STORAGE_FORMAT_ORC_ORC_FORMAT_WRITER_H_
+
+#include <string>
+
+#include "dbcommon/common/tuple-batch.h"
+#include "dbcommon/common/tuple-desc.h"
+#include "dbcommon/filesystem/file-system-manager.h"
+#include "dbcommon/filesystem/file-system.h"
+#include "storage/format/orc/writer.h"
+
+namespace storage {
+
+class Parameters;
+class TupleBatch;
+class ORCFormatWriter {
+ public:
+  ORCFormatWriter(dbcommon::FileSystemManagerInterface* fsManager,
+                  dbcommon::TupleDesc* td, const char* fileName,
+                  uint32_t blockAlignSize, dbcommon::Parameters* p);
+
+  virtual ~ORCFormatWriter() {}
+
+  void beginWrite();
+
+  void write(dbcommon::TupleBatch* tb);
+
+  void endWrite();
+
+ private:
+  std::unique_ptr<orc::Type> buildSchema(dbcommon::TupleDesc* td);
+
+ private:
+  dbcommon::FileSystemManagerInterface* fsManager = nullptr;
+  dbcommon::FileSystem* fileSystem = nullptr;
+  std::string fileName;
+  dbcommon::TupleDesc* desc = nullptr;
+
+  orc::WriterOptions opts;
+  std::unique_ptr<orc::Writer> writer;
+};
+
+}  // namespace storage
+
+#endif  // STORAGE_SRC_STORAGE_FORMAT_ORC_ORC_FORMAT_WRITER_H_
diff --git a/depends/storage/src/storage/format/orc/orc-format.cc b/depends/storage/src/storage/format/orc/orc-format.cc
new file mode 100644
index 0000000..1a5d668
--- /dev/null
+++ b/depends/storage/src/storage/format/orc/orc-format.cc
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "storage/format/orc/orc-format.h"
+
+#include <memory>
+#include <utility>
+
+#include "dbcommon/log/logger.h"
+
+namespace storage {
+
+void ORCFormat::beginInsert(const std::string &targetName,
+                            const dbcommon::TupleDesc &tupleDesc) {
+  assert(!targetName.empty());
+  assert(params != nullptr);
+
+  writer.reset(new ORCFormatWriter(
+      this->fsManager, const_cast<dbcommon::TupleDesc *>(&tupleDesc),
+      targetName.c_str(), this->blockAlignSize, params));
+  writer->beginWrite();
+}
+
+void ORCFormat::doInsert(std::unique_ptr<dbcommon::TupleBatch> tb) {
+  writer->write(tb.get());
+}
+
+void ORCFormat::endInsert() { writer->endWrite(); }
+
+void ORCFormat::beginUpdate(const std::string &targetName,
+                            const dbcommon::TupleDesc &td) {
+  LOG_ERROR(ERRCODE_FEATURE_NOT_SUPPORTED, "UPDATE is not implemented yet");
+}
+
+void ORCFormat::doUpdate(std::unique_ptr<dbcommon::TupleBatch> tb) {
+  LOG_ERROR(ERRCODE_FEATURE_NOT_SUPPORTED, "UPDATE is not implemented yet");
+}
+
+void ORCFormat::endUpdate() {
+  LOG_ERROR(ERRCODE_FEATURE_NOT_SUPPORTED, "UPDATE is not implemented yet");
+}
+
+void ORCFormat::beginDelete(const std::string &targetName,
+                            const dbcommon::TupleDesc &td) {
+  LOG_ERROR(ERRCODE_FEATURE_NOT_SUPPORTED, "DELETE is not implemented yet");
+}
+
+void ORCFormat::doDelete(std::unique_ptr<dbcommon::TupleBatch> tb) {
+  LOG_ERROR(ERRCODE_FEATURE_NOT_SUPPORTED, "DELETE is not implemented yet");
+}
+
+void ORCFormat::endDelete() {
+  LOG_ERROR(ERRCODE_FEATURE_NOT_SUPPORTED, "DELETE is not implemented yet");
+}
+
+void ORCFormat::beginScan(const univplan::UnivPlanScanFileSplitListList *splits,
+                          const dbcommon::TupleDesc *tupleDesc,
+                          const std::vector<bool> *projectionCols,
+                          const univplan::UnivPlanExprPolyList *filterExprs,
+                          const FormatContext *formatContext,
+                          bool readStatsOnly) {
+  this->splits = splits;
+  if (this->splits != nullptr) {
+    assert(tupleDesc != nullptr);
+    assert(params != nullptr);
+    std::string tableOptionStr = params->get("table.options", "");
+    assert(!tableOptionStr.empty());
+    Json::Reader jreader;
+    Json::Value root;
+    if (!jreader.parse(tableOptionStr, root))
+      LOG_ERROR(ERRCODE_INTERNAL_ERROR, "jsoncpp failed to parse \'%s\'",
+                tableOptionStr.c_str());
+
+    reader.reset(new ORCFormatReader());
+
+    assert(fsManager != nullptr);
+
+    reader->beginRead(fsManager, splits,
+                      const_cast<std::vector<bool> *>(projectionCols),
+                      this->nTuplesPerBatch,
+                      const_cast<univplan::UnivPlanExprPolyList *>(filterExprs),
+                      tupleDesc, readStatsOnly);
+  }
+}
+
+dbcommon::TupleBatch::uptr ORCFormat::next() {
+  if (splits != nullptr) {
+    dbcommon::TupleBatch::uptr result = reader->read();
+    assert(!result || result->isValid());
+    return std::move(result);
+  }
+
+  return dbcommon::TupleBatch::uptr();
+}
+
+void ORCFormat::endScan() {
+  if (splits != nullptr) {
+    reader->endRead();
+  }
+}
+
+void ORCFormat::reScan() {
+  if (this->splits != nullptr) {
+    assert(reader != nullptr);
+    reader->reset();
+  }
+}
+
+void ORCFormat::stopScan() {
+  LOG_ERROR(ERRCODE_FEATURE_NOT_SUPPORTED, "stopScan is not implemented yet");
+}
+
+}  // namespace storage
diff --git a/depends/storage/src/storage/format/orc/orc-format.h b/depends/storage/src/storage/format/orc/orc-format.h
new file mode 100644
index 0000000..6dd3632
--- /dev/null
+++ b/depends/storage/src/storage/format/orc/orc-format.h
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef STORAGE_SRC_STORAGE_FORMAT_ORC_ORC_FORMAT_H_
+#define STORAGE_SRC_STORAGE_FORMAT_ORC_ORC_FORMAT_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "json/json.h"
+
+#include "dbcommon/utils/int-util.h"
+#include "storage/format/format.h"
+#include "storage/format/orc/orc-format-reader.h"
+#include "storage/format/orc/orc-format-writer.h"
+
+namespace storage {
+
+//
+// ORCFormat
+//
+class ORCFormat : public Format {
+ public:
+  ORCFormat() {}
+
+  // constructor with input parameters map
+  explicit ORCFormat(dbcommon::Parameters *params) : params(params) {
+    if (params != nullptr) {
+      this->blockAlignSize =
+          params->getAsInt32("format.block.align.size", Format::kBlockSize);
+
+      if (!dbcommon::isPowerOfTwo(this->blockAlignSize)) {
+        LOG_ERROR(ERRCODE_INVALID_PARAMETER_VALUE,
+                  "for ORCFormat, "
+                  "format.block.align.size can only be power of 2, input is %d",
+                  this->blockAlignSize);
+      }
+
+      this->nTuplesPerBatch = params->getAsInt32("number.tuples.per.batch",
+                                                 Format::kTuplesPerBatch);
+      if (this->nTuplesPerBatch % 8 != 0)
+        LOG_ERROR(
+            ERRCODE_INVALID_PARAMETER_VALUE,
+            "for ORCFormat, "
+            "number.tuples.per.batch can only be multiples of 8, input is %d",
+            this->nTuplesPerBatch);
+    }
+  }
+
+  virtual ~ORCFormat() {}
+
+  ORCFormat(ORCFormat &&format) = delete;
+  ORCFormat(const ORCFormat &format) = delete;
+  ORCFormat &operator=(const ORCFormat &format) = delete;
+  ORCFormat &operator=(ORCFormat &&format) = delete;
+
+  void beginScan(const univplan::UnivPlanScanFileSplitListList *splits,
+                 const dbcommon::TupleDesc *tupleDesc,
+                 const std::vector<bool> *projectionCols,
+                 const univplan::UnivPlanExprPolyList *filterExpr,
+                 const FormatContext *formatContext,
+                 bool readStatsOnly) override;
+
+  dbcommon::TupleBatch::uptr next() override;
+  void endScan() override;
+  void reScan() override;
+  void stopScan() override;
+
+  void beginInsert(const std::string &targetName,
+                   const dbcommon::TupleDesc &td) override;
+  void doInsert(std::unique_ptr<dbcommon::TupleBatch> tb) override;
+  void endInsert() override;
+
+  void beginUpdate(const std::string &targetName,
+                   const dbcommon::TupleDesc &td) override;
+  void doUpdate(std::unique_ptr<dbcommon::TupleBatch> tb) override;
+  void endUpdate() override;
+
+  void beginDelete(const std::string &targetName,
+                   const dbcommon::TupleDesc &td) override;
+  void doDelete(std::unique_ptr<dbcommon::TupleBatch> tb) override;
+  void endDelete() override;
+
+ private:
+  std::unique_ptr<ORCFormatWriter> writer;
+  std::unique_ptr<ORCFormatReader> reader;
+
+  uint32_t blockAlignSize = Format::kBlockSize;
+  uint32_t nTuplesPerBatch = Format::kTuplesPerBatch;
+
+  dbcommon::Parameters *params = nullptr;
+};
+
+}  // namespace storage
+
+#endif  // STORAGE_SRC_STORAGE_FORMAT_ORC_ORC_FORMAT_H_
diff --git a/depends/storage/src/storage/format/orc/orc-predicates.cc b/depends/storage/src/storage/format/orc/orc-predicates.cc
new file mode 100644
index 0000000..e5ef1b6
--- /dev/null
+++ b/depends/storage/src/storage/format/orc/orc-predicates.cc
@@ -0,0 +1,281 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "storage/format/orc/orc-predicates.h"
+
+#include <memory>
+#include <sstream>
+#include <utility>
+
+#include "dbcommon/log/logger.h"
+#include "dbcommon/type/decimal.h"
+#include "dbcommon/utils/string-util.h"
+#include "univplan/common/plannode-walker.h"
+
+#include "storage/common/bloom-filter.h"
+#include "storage/format/orc/reader.h"
+
+namespace orc {
+
+bool OrcPredicates::hasAllNull(int32_t colId) const {
+  disableInvalidColId(colId);
+  const Type& child = *reader->getType().getSubtype(colId - 1);
+  return stripeStats->getColumnStatistics(child.getColumnId())
+                 ->getNumberOfValues() == 0 &&
+         stripeStats->getColumnStatistics(child.getColumnId())->hasNull();
+}
+
+bool OrcPredicates::hasNull(int32_t colId) const {
+  disableInvalidColId(colId);
+  const Type& child = *reader->getType().getSubtype(colId - 1);
+  return stripeStats->getColumnStatistics(child.getColumnId())->hasNull();
+}
+
+univplan::PredicateStats OrcPredicates::getMinMax(int32_t colId) const {
+  disableInvalidColId(colId);
+  dbcommon::Timestamp ts1, ts2;
+  return getMinMax(colId, &ts1, &ts2);
+}
+
+univplan::PredicateStats OrcPredicates::getMinMax(
+    int32_t colId, dbcommon::Timestamp* minTimestamp,
+    dbcommon::Timestamp* maxTimestamp) const {
+  disableInvalidColId(colId);
+  const Type& child = *reader->getType().getSubtype(colId - 1);
+  const univplan::ColumnStatistics* stats =
+      stripeStats->getColumnStatistics(child.getColumnId());
+  dbcommon::TypeKind type = td->getColumnType(colId - 1);
+  univplan::PredicateStats ret;
+  ret.hasMinMax = true;
+  switch (type) {
+    case dbcommon::TypeKind::SMALLINTID:
+    case dbcommon::TypeKind::INTID:
+    case dbcommon::TypeKind::BIGINTID:
+    case dbcommon::TypeKind::TIMEID: {
+      const IntegerColumnStatisticsImpl* iStat =
+          dynamic_cast<const IntegerColumnStatisticsImpl*>(stats);
+      if (type == dbcommon::TypeKind::SMALLINTID) {
+        ret.minValue = dbcommon::Scalar(
+            dbcommon::CreateDatum(static_cast<int16_t>(iStat->getMinimum())),
+            false);
+        ret.maxValue = dbcommon::Scalar(
+            dbcommon::CreateDatum(static_cast<int16_t>(iStat->getMaximum())),
+            false);
+      } else if (type == dbcommon::TypeKind::INTID ||
+                 type == dbcommon::TypeKind::DATEID) {
+        ret.minValue = dbcommon::Scalar(
+            dbcommon::CreateDatum(static_cast<int32_t>(iStat->getMinimum())),
+            false);
+        ret.maxValue = dbcommon::Scalar(
+            dbcommon::CreateDatum(static_cast<int32_t>(iStat->getMaximum())),
+            false);
+      } else {
+        ret.minValue =
+            dbcommon::Scalar(dbcommon::CreateDatum(iStat->getMinimum()), false);
+        ret.maxValue =
+            dbcommon::Scalar(dbcommon::CreateDatum(iStat->getMaximum()), false);
+      }
+      break;
+    }
+    case dbcommon::TypeKind::FLOATID:
+    case dbcommon::TypeKind::DOUBLEID: {
+      const DoubleColumnStatisticsImpl* dStat =
+          dynamic_cast<const DoubleColumnStatisticsImpl*>(stats);
+      if (type == dbcommon::TypeKind::FLOATID) {
+        ret.minValue = dbcommon::Scalar(
+            dbcommon::CreateDatum(static_cast<float>(dStat->getMinimum())),
+            false);
+        ret.maxValue = dbcommon::Scalar(
+            dbcommon::CreateDatum(static_cast<float>(dStat->getMaximum())),
+            false);
+      } else {
+        ret.minValue =
+            dbcommon::Scalar(dbcommon::CreateDatum(dStat->getMinimum()), false);
+        ret.maxValue =
+            dbcommon::Scalar(dbcommon::CreateDatum(dStat->getMaximum()), false);
+      }
+      break;
+    }
+    case dbcommon::TypeKind::CHARID: {
+      // we need to trim here
+      const StringColumnStatisticsImpl* sStat =
+          dynamic_cast<const StringColumnStatisticsImpl*>(stats);
+      ret.minValue =
+          dbcommon::Scalar(dbcommon::CreateDatum(sStat->getMinimum()), false);
+      const char* s = sStat->getMinimum();
+      uint32_t len = strlen(s);
+      while (len != 0 && s[len - 1] == ' ') --len;
+      ret.minValue.length = len;
+      ret.maxValue =
+          dbcommon::Scalar(dbcommon::CreateDatum(sStat->getMaximum()), false);
+      s = sStat->getMaximum();
+      len = strlen(s);
+      while (len != 0 && s[len - 1] == ' ') --len;
+      ret.maxValue.length = len;
+      break;
+    }
+    case dbcommon::TypeKind::VARCHARID:
+    case dbcommon::TypeKind::STRINGID: {
+      const StringColumnStatisticsImpl* sStat =
+          dynamic_cast<const StringColumnStatisticsImpl*>(stats);
+      ret.minValue =
+          dbcommon::Scalar(dbcommon::CreateDatum(sStat->getMinimum()), false);
+      ret.minValue.length = strlen(sStat->getMinimum());
+      ret.maxValue =
+          dbcommon::Scalar(dbcommon::CreateDatum(sStat->getMaximum()), false);
+      ret.maxValue.length = strlen(sStat->getMaximum());
+      break;
+    }
+    case dbcommon::TypeKind::BOOLEANID: {
+      const BooleanColumnStatisticsImpl* bStat =
+          dynamic_cast<const BooleanColumnStatisticsImpl*>(stats);
+      ret.minValue = dbcommon::Scalar(
+          dbcommon::CreateDatum(bStat->getFalseCount() == 0), false);
+      ret.maxValue = dbcommon::Scalar(
+          dbcommon::CreateDatum(bStat->getTrueCount() > 0), false);
+      break;
+    }
+    case dbcommon::TypeKind::DATEID: {
+      const DateColumnStatisticsImpl* dStat =
+          dynamic_cast<const DateColumnStatisticsImpl*>(stats);
+      ret.minValue = dbcommon::Scalar(
+          dbcommon::CreateDatum(static_cast<int32_t>(dStat->getMinimum())),
+          false);
+      ret.maxValue = dbcommon::Scalar(
+          dbcommon::CreateDatum(static_cast<int32_t>(dStat->getMaximum())),
+          false);
+      break;
+    }
+    case dbcommon::TypeKind::TIMESTAMPID:
+    case dbcommon::TypeKind::TIMESTAMPTZID: {
+      const TimestampColumnStatisticsImpl* tStat =
+          dynamic_cast<const TimestampColumnStatisticsImpl*>(stats);
+      minTimestamp->second = tStat->getMinimum() / 1000;
+      minTimestamp->nanosecond = (tStat->getMinimum() % 1000) * 1000000;
+      maxTimestamp->second = tStat->getMaximum() / 1000;
+      maxTimestamp->nanosecond =
+          (tStat->getMaximum() % 1000) * 1000000 + 999999;
+      ret.minValue =
+          dbcommon::Scalar(dbcommon::CreateDatum(minTimestamp), false);
+      ret.minValue.length = sizeof(dbcommon::Timestamp);
+      ret.maxValue =
+          dbcommon::Scalar(dbcommon::CreateDatum(maxTimestamp), false);
+      ret.maxValue.length = sizeof(dbcommon::Timestamp);
+      break;
+    }
+    case dbcommon::TypeKind::DECIMALID: {
+      const DecimalColumnStatisticsImpl* dStat =
+          dynamic_cast<const DecimalColumnStatisticsImpl*>(stats);
+      ret.minValue = dbcommon::Scalar(
+          dbcommon::CreateDatum(dStat->getMinimumStr()), false);
+      ret.minValue.length = sizeof(dbcommon::DecimalVar);
+      ret.maxValue = dbcommon::Scalar(
+          dbcommon::CreateDatum(dStat->getMaximumStr()), false);
+      ret.maxValue.length = sizeof(dbcommon::DecimalVar);
+      break;
+    }
+    default: {
+      ret.hasMinMax = false;
+    }
+  }
+  return ret;
+}
+
+bool OrcPredicates::canDropByBloomFilter(int32_t colId,
+                                         univplan::PredicateStats* stat,
+                                         dbcommon::TypeKind type) const {
+  disableInvalidColId(colId);
+  const Type& child = *reader->getType().getSubtype(colId - 1);
+  proto::BloomFilterIndex bloomFilterIndexProto =
+      reader->rebuildBloomFilter(child.getColumnId());
+  if (bloomFilterIndexProto.bloomfilter_size() == 0) return false;
+
+  for (int32_t i = 0; i < bloomFilterIndexProto.bloomfilter_size(); ++i) {
+    const proto::BloomFilter& bloomFilterProto =
+        bloomFilterIndexProto.bloomfilter(i);
+    std::vector<uint64_t> data;
+    for (int32_t j = 0; j < bloomFilterProto.bitset_size(); ++j)
+      data.push_back(bloomFilterProto.bitset(j));
+    storage::BloomFilter::uptr bf(new storage::BloomFilter(
+        data.data(), data.size(), bloomFilterProto.numhashfunctions()));
+    switch (type) {
+      case dbcommon::TypeKind::SMALLINTID: {
+        if (bf->testInt(dbcommon::DatumGetValue<int16_t>(stat->maxValue.value)))
+          return false;
+        break;
+      }
+      case dbcommon::TypeKind::INTID:
+      case dbcommon::TypeKind::DATEID: {
+        if (bf->testInt(dbcommon::DatumGetValue<int32_t>(stat->maxValue.value)))
+          return false;
+        break;
+      }
+      case dbcommon::TypeKind::BIGINTID:
+      case dbcommon::TypeKind::TIMEID: {
+        if (bf->testInt(dbcommon::DatumGetValue<int64_t>(stat->maxValue.value)))
+          return false;
+        break;
+      }
+      case dbcommon::TypeKind::FLOATID: {
+        if (bf->testDouble(
+                dbcommon::DatumGetValue<float>(stat->maxValue.value)))
+          return false;
+        break;
+      }
+      case dbcommon::TypeKind::DOUBLEID: {
+        if (bf->testDouble(
+                dbcommon::DatumGetValue<double>(stat->maxValue.value)))
+          return false;
+        break;
+      }
+      case dbcommon::TypeKind::CHARID:
+      case dbcommon::TypeKind::VARCHARID:
+      case dbcommon::TypeKind::STRINGID: {
+        const char* str =
+            dbcommon::DatumGetValue<const char*>(stat->maxValue.value);
+        if (bf->testString(str, strlen(str))) return false;
+        break;
+      }
+      case dbcommon::TypeKind::TIMESTAMPID:
+      case dbcommon::TypeKind::TIMESTAMPTZID: {
+        dbcommon::Timestamp* ts =
+            dbcommon::DatumGetValue<dbcommon::Timestamp*>(stat->maxValue.value);
+        if (bf->testInt(ts->second * 1000 + ts->nanosecond / 1000000))
+          return false;
+        break;
+      }
+      default: {
+        LOG_ERROR(
+            ERRCODE_FEATURE_NOT_SUPPORTED,
+            "not supported type %d in OrcPredicates::canDropByBloomFilter",
+            type);
+      }
+    }
+  }
+
+  return true;
+}
+
+void OrcPredicates::disableInvalidColId(int32_t colId) const {
+  if (colId < 0)
+    LOG_ERROR(ERRCODE_FEATURE_NOT_SUPPORTED,
+              "hidden column doesn't support predicate");
+}
+
+}  // end of namespace orc
diff --git a/depends/storage/src/storage/format/orc/orc-predicates.h b/depends/storage/src/storage/format/orc/orc-predicates.h
new file mode 100644
index 0000000..0da9029
--- /dev/null
+++ b/depends/storage/src/storage/format/orc/orc-predicates.h
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef STORAGE_SRC_STORAGE_FORMAT_ORC_ORC_PREDICATES_H_
+#define STORAGE_SRC_STORAGE_FORMAT_ORC_ORC_PREDICATES_H_
+
+#include <string>
+#include <vector>
+
+#include "dbcommon/common/tuple-batch.h"
+#include "dbcommon/common/tuple-desc.h"
+#include "dbcommon/nodes/datum.h"
+
+#include "storage/format/orc/orc-proto-definition.h"
+
+#include "univplan/common/expression.h"
+#include "univplan/common/univplan-type.h"
+#include "univplan/common/var-util.h"
+#include "univplan/minmax/minmax-predicates.h"
+
+namespace orc {
+
+class ReaderImpl;
+class OrcPredicates : public univplan::MinMaxPredicatesPage {
+ public:
+  OrcPredicates(const univplan::Statistics* s, ReaderImpl* r,
+                const univplan::UnivPlanExprPolyList* predicateExprs,
+                const dbcommon::TupleDesc* tupleDesc)
+      : univplan::MinMaxPredicatesPage(s, predicateExprs, tupleDesc),
+        reader(r) {}
+  virtual ~OrcPredicates() {}
+
+  typedef std::unique_ptr<OrcPredicates> uptr;
+
+ public:
+  virtual bool hasNull(int32_t colId) const;
+  virtual bool hasAllNull(int32_t colId) const;
+  virtual bool canDropByBloomFilter(int32_t colId,
+                                    univplan::PredicateStats* stat,
+                                    dbcommon::TypeKind type) const;
+  virtual univplan::PredicateStats getMinMax(int32_t colId) const;
+  virtual univplan::PredicateStats getMinMax(
+      int32_t colId, dbcommon::Timestamp* minTimestamp,
+      dbcommon::Timestamp* maxTimestamp) const;
+
+ private:
+  void disableInvalidColId(int32_t colId) const;
+
+ private:
+  ReaderImpl* reader;
+};
+
+}  // end of namespace orc
+
+#endif  // STORAGE_SRC_STORAGE_FORMAT_ORC_ORC_PREDICATES_H_
diff --git a/depends/storage/src/storage/format/orc/orc-proto-definition.cc b/depends/storage/src/storage/format/orc/orc-proto-definition.cc
new file mode 100644
index 0000000..9c6caa1
--- /dev/null
+++ b/depends/storage/src/storage/format/orc/orc-proto-definition.cc
@@ -0,0 +1,221 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "storage/format/orc/orc-proto-definition.h"
+#include "storage/format/orc/seekable-input-stream.h"
+
+namespace orc {
+
+StreamInformation::~StreamInformation() {
+  // PASS
+}
+
+StripeInformation::~StripeInformation() {
+  // PASS
+}
+
+void StripeInformationImpl::ensureStripeFooterLoaded() const {
+  if (stripeFooter.get() == nullptr) {
+    std::unique_ptr<SeekableInputStream> pbStream = createDecompressor(
+        compression,
+        std::unique_ptr<SeekableInputStream>(new SeekableFileInputStream(
+            stream, offset + indexLength + dataLength, footerLength,
+            memoryPool)),
+        blockSize, memoryPool);
+    stripeFooter.reset(new proto::StripeFooter());
+    if (!stripeFooter->ParseFromZeroCopyStream(pbStream.get())) {
+      LOG_ERROR(ERRCODE_INTERNAL_ERROR, "Failed to parse the stripe footer");
+    }
+  }
+}
+
+std::unique_ptr<StreamInformation> StripeInformationImpl::getStreamInformation(
+    uint64_t streamId) const {
+  ensureStripeFooterLoaded();
+  uint64_t streamOffset = offset;
+  for (uint64_t s = 0; s < streamId; ++s) {
+    streamOffset += stripeFooter->streams(static_cast<int>(s)).length();
+  }
+  return std::unique_ptr<StreamInformation>(new StreamInformationImpl(
+      streamOffset, stripeFooter->streams(static_cast<int>(streamId))));
+}
+
+StatisticsImpl::~StatisticsImpl() {
+  for (std::list<univplan::ColumnStatistics*>::iterator ptr = colStats.begin();
+       ptr != colStats.end(); ++ptr) {
+    delete *ptr;
+  }
+}
+
+univplan::ColumnStatistics* convertColumnStatistics(
+    const proto::ColumnStatistics& s, bool correctStats) {
+  if (s.has_intstatistics()) {
+    return new IntegerColumnStatisticsImpl(s);
+  } else if (s.has_doublestatistics()) {
+    return new DoubleColumnStatisticsImpl(s);
+  } else if (s.has_stringstatistics()) {
+    return new StringColumnStatisticsImpl(s, correctStats);
+  } else if (s.has_bucketstatistics()) {
+    return new BooleanColumnStatisticsImpl(s, correctStats);
+  } else if (s.has_decimalstatistics()) {
+    return new DecimalColumnStatisticsImpl(s, correctStats);
+  } else if (s.has_timestampstatistics()) {
+    return new TimestampColumnStatisticsImpl(s, correctStats);
+  } else if (s.has_datestatistics()) {
+    return new DateColumnStatisticsImpl(s, correctStats);
+  } else if (s.has_binarystatistics()) {
+    return new BinaryColumnStatisticsImpl(s, correctStats);
+  } else {
+    return new ColumnStatisticsImpl(s);
+  }
+}
+
+// TODO(zhenglin): to complete other types
+std::unique_ptr<ColumnStatisticsImpl> createColumnStatistics(
+    const orc::Type* type) {
+  switch (type->getKind()) {
+    case orc::ORCTypeKind::BYTE:
+    case orc::ORCTypeKind::SHORT:
+    case orc::ORCTypeKind::INT:
+    case orc::ORCTypeKind::LONG:
+    case orc::ORCTypeKind::TIME:
+      return std::unique_ptr<ColumnStatisticsImpl>(
+          new IntegerColumnStatisticsImpl());
+    case orc::ORCTypeKind::FLOAT:
+    case orc::ORCTypeKind::DOUBLE:
+      return std::unique_ptr<ColumnStatisticsImpl>(
+          new DoubleColumnStatisticsImpl());
+    case orc::ORCTypeKind::STRING:
+    case orc::ORCTypeKind::VARCHAR:
+    case orc::ORCTypeKind::CHAR:
+      return std::unique_ptr<ColumnStatisticsImpl>(
+          new StringColumnStatisticsImpl());
+    case orc::ORCTypeKind::BOOLEAN:
+      return std::unique_ptr<ColumnStatisticsImpl>(
+          new BooleanColumnStatisticsImpl());
+    case orc::ORCTypeKind::BINARY:
+      return std::unique_ptr<ColumnStatisticsImpl>(
+          new BinaryColumnStatisticsImpl());
+    case orc::ORCTypeKind::DATE:
+      return std::unique_ptr<ColumnStatisticsImpl>(
+          new DateColumnStatisticsImpl());
+    case orc::ORCTypeKind::TIMESTAMP:
+      return std::unique_ptr<ColumnStatisticsImpl>(
+          new TimestampColumnStatisticsImpl());
+    case orc::ORCTypeKind::DECIMAL:
+      return std::unique_ptr<ColumnStatisticsImpl>(
+          new DecimalColumnStatisticsImpl());
+    default:
+      return std::unique_ptr<ColumnStatisticsImpl>(new ColumnStatisticsImpl());
+  }
+}
+
+DateColumnStatisticsImpl::~DateColumnStatisticsImpl() {
+  // PASS
+}
+
+DecimalColumnStatisticsImpl::~DecimalColumnStatisticsImpl() {
+  // PASS
+}
+
+TimestampColumnStatisticsImpl::~TimestampColumnStatisticsImpl() {
+  // PASS
+}
+
+DateColumnStatisticsImpl::DateColumnStatisticsImpl(
+    const proto::ColumnStatistics& pb, bool correctStats) {
+  valueCount = pb.numberofvalues();
+  hasNullValue = pb.hasnull();
+  if (!pb.has_datestatistics() || !correctStats) {
+    _hasStats = false;
+
+    minimum = 0;
+    maximum = 0;
+  } else {
+    _hasStats = pb.datestatistics().has_minimum();
+    minimum = pb.datestatistics().minimum();
+    maximum = pb.datestatistics().maximum();
+  }
+}
+
+DecimalColumnStatisticsImpl::DecimalColumnStatisticsImpl(
+    const proto::ColumnStatistics& pb, bool correctStats) {
+  valueCount = pb.numberofvalues();
+  hasNullValue = pb.hasnull();
+  if (!pb.has_decimalstatistics() || !correctStats) {
+    _hasMinimum = false;
+    _hasMaximum = false;
+    _hasSum = false;
+  } else {
+    const proto::DecimalStatistics& stats = pb.decimalstatistics();
+    _hasMinimum = stats.has_minimum();
+    _hasMaximum = stats.has_maximum();
+    _hasSum = stats.has_sum();
+
+    minimum = stats.minimum();
+    maximum = stats.maximum();
+    sum = stats.sum();
+  }
+}
+
+void DecimalColumnStatisticsImpl::updateSum(Decimal value) {
+  bool overflow = false;
+  Decimal currentSum = this->getSum();
+
+  if (currentSum.scale > value.scale) {
+    value.value = scaleUpInt128ByPowerOfTen(
+        value.value, currentSum.scale - value.scale, overflow);
+  } else if (currentSum.scale < value.scale) {
+    currentSum.value = scaleUpInt128ByPowerOfTen(
+        currentSum.value, value.scale - currentSum.scale, overflow);
+    currentSum.scale = value.scale;
+  }
+
+  if (!overflow) {
+    bool wasPositive = currentSum.value >= 0;
+    currentSum.value += value.value;
+    if ((value.value >= 0) == wasPositive) {
+      _hasSum = ((currentSum.value >= 0) == wasPositive);
+    }
+  } else {
+    _hasSum = false;
+  }
+
+  if (_hasSum) {
+    sum = currentSum.toString();
+  }
+}
+
+TimestampColumnStatisticsImpl::TimestampColumnStatisticsImpl(
+    const proto::ColumnStatistics& pb, bool correctStats) {
+  valueCount = pb.numberofvalues();
+  hasNullValue = pb.hasnull();
+  if (!pb.has_timestampstatistics() || !correctStats) {
+    _hasStats = false;
+    minimum = 0;
+    maximum = 0;
+  } else {
+    const proto::TimestampStatistics& stats = pb.timestampstatistics();
+    _hasStats = stats.has_minimum();
+
+    minimum = stats.minimum();
+    maximum = stats.maximum();
+  }
+}
+}  // namespace orc
diff --git a/depends/storage/src/storage/format/orc/orc-proto-definition.h b/depends/storage/src/storage/format/orc/orc-proto-definition.h
new file mode 100644
index 0000000..47b67cd
--- /dev/null
+++ b/depends/storage/src/storage/format/orc/orc-proto-definition.h
@@ -0,0 +1,1131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef STORAGE_SRC_STORAGE_FORMAT_ORC_ORC_PROTO_DEFINITION_H_
+#define STORAGE_SRC_STORAGE_FORMAT_ORC_ORC_PROTO_DEFINITION_H_
+
+#include <list>
+#include <memory>
+#include <sstream>
+#include <string>
+
+#include "univplan/common/statistics.h"
+
+#include "storage/format/orc/input-stream.h"
+#include "storage/format/orc/type-impl.h"
+#include "storage/format/orc/vector.h"
+
+namespace orc {
+
+static const uint64_t ORC_COMPRESSION_BLOCK_SIZE = 256 * 1024;  // 256K
+
+enum WriterId {
+  ORC_JAVA_WRITER = 0,
+  ORC_CPP_WRITER = 1,
+  PRESTO_WRITER = 2,
+  UNKNOWN_WRITER = INT32_MAX
+};
+
+enum CompressionKind {
+  CompressionKind_NONE = 0,
+  CompressionKind_ZLIB = 1,
+  CompressionKind_SNAPPY = 2,
+  CompressionKind_LZO = 3,
+  CompressionKind_LZ4 = 4,
+  CompressionKind_ZSTD = 5,
+  CompressionKind_MAX = INT64_MAX
+};
+
+enum WriterVersion {
+  WriterVersion_ORIGINAL = 0,
+  WriterVersion_HIVE_8732 = 1,
+  WriterVersion_HIVE_4243 = 2,
+  WriterVersion_HIVE_12055 = 3,
+  WriterVersion_HIVE_13083 = 4,
+  WriterVersion_ORC_101 = 5,
+  WriterVersion_ORC_135 = 6,
+  WriterVersion_MAX = INT64_MAX
+};
+
+enum StreamKind {
+  StreamKind_PRESENT = 0,
+  StreamKind_DATA = 1,
+  StreamKind_LENGTH = 2,
+  StreamKind_DICTIONARY_DATA = 3,
+  StreamKind_DICTIONARY_COUNT = 4,
+  StreamKind_SECONDARY = 5,
+  StreamKind_ROW_INDEX = 6,
+  StreamKind_BLOOM_FILTER = 7
+};
+
+class ColumnStatisticsImpl : public univplan::ColumnStatistics {
+ public:
+  ColumnStatisticsImpl() { reset(); }
+  explicit ColumnStatisticsImpl(const proto::ColumnStatistics& stats) {
+    if (stats.has_numberofvalues())
+      valueCount = stats.numberofvalues();
+    else
+      valueCount = 0;
+    if (stats.has_hasnull())
+      hasNullValue = stats.hasnull();
+    else
+      hasNullValue = true;
+  }
+  virtual ~ColumnStatisticsImpl() {}
+
+  virtual void serialize(proto::ColumnStatistics* pb) {
+    assert(pb != nullptr);
+    pb->set_numberofvalues(valueCount);
+    pb->set_hasnull(hasNullValue);
+  }
+};
+
+class IntegerColumnStatisticsImpl : public ColumnStatisticsImpl {
+ private:
+  bool _hasStats;
+  int64_t minimum;
+  int64_t maximum;
+  int64_t sum;
+
+ public:
+  IntegerColumnStatisticsImpl() : ColumnStatisticsImpl() { resetInternal(); }
+  explicit IntegerColumnStatisticsImpl(const proto::ColumnStatistics& stats)
+      : ColumnStatisticsImpl(stats) {
+    if (!stats.has_intstatistics()) {
+      resetInternal();
+    } else {
+      const proto::IntegerStatistics& s = stats.intstatistics();
+      _hasStats = s.has_minimum();
+      if (_hasStats) {
+        minimum = s.minimum();
+        maximum = s.maximum();
+        sum = s.sum();
+      }
+    }
+  }
+  virtual ~IntegerColumnStatisticsImpl() {}
+
+  bool hasMinimum() const { return _hasStats; }
+
+  bool hasMaximum() const { return _hasStats; }
+
+  bool hasSum() const { return _hasStats; }
+
+  int64_t getMinimum() const {
+    if (_hasStats) {
+      return minimum;
+    } else {
+      LOG_ERROR(ERRCODE_INTERNAL_ERROR, "Minimum is not defined.");
+    }
+  }
+
+  int64_t getMaximum() const {
+    if (_hasStats) {
+      return maximum;
+    } else {
+      LOG_ERROR(ERRCODE_INTERNAL_ERROR, "Maximum is not defined.");
+    }
+  }
+
+  int64_t getSum() const {
+    if (_hasStats) {
+      return sum;
+    } else {
+      LOG_ERROR(ERRCODE_INTERNAL_ERROR, "Sum is not defined.");
+    }
+  }
+
+  void updateInteger(int64_t value) {
+    if (!_hasStats) {
+      _hasStats = true;
+      minimum = value;
+      maximum = value;
+    } else if (value < minimum) {
+      minimum = value;
+    } else if (value > maximum) {
+      maximum = value;
+    }
+    sum += value;
+  }
+
+  void merge(const ColumnStatistics& stats) override {
+    ColumnStatisticsImpl::merge(stats);
+    const IntegerColumnStatisticsImpl* other =
+        dynamic_cast<const IntegerColumnStatisticsImpl*>(&stats);
+    assert(other != nullptr);
+    if (other->hasMinimum()) {
+      if (!_hasStats) {
+        _hasStats = other->hasMinimum();
+        minimum = other->getMinimum();
+        maximum = other->getMaximum();
+        sum = other->getSum();
+      } else {
+        if (other->getMinimum() < minimum) {
+          minimum = other->getMinimum();
+        }
+        if (other->getMaximum() > maximum) {
+          maximum = other->getMaximum();
+        }
+        sum += other->getSum();
+      }
+    }
+  }
+
+  void serialize(proto::ColumnStatistics* pb) override {
+    assert(pb != nullptr);
+    ColumnStatisticsImpl::serialize(pb);
+    proto::IntegerStatistics* stats = pb->mutable_intstatistics();
+    if (_hasStats) {
+      stats->set_minimum(minimum);
+      stats->set_maximum(maximum);
+      stats->set_sum(sum);
+    }
+  }
+
+  void reset() override {
+    ColumnStatisticsImpl::reset();
+    resetInternal();
+  }
+
+ private:
+  void resetInternal() {
+    _hasStats = false;
+    minimum = 0;
+    maximum = 0;
+    sum = 0;
+  }
+};
+
+class DoubleColumnStatisticsImpl : public ColumnStatisticsImpl {
+ private:
+  bool _hasStats;
+  double minimum;
+  double maximum;
+  double sum;
+
+ public:
+  DoubleColumnStatisticsImpl() : ColumnStatisticsImpl() { resetInternal(); }
+  explicit DoubleColumnStatisticsImpl(const proto::ColumnStatistics& stats)
+      : ColumnStatisticsImpl(stats) {
+    if (!stats.has_doublestatistics()) {
+      resetInternal();
+    } else {
+      const proto::DoubleStatistics& s = stats.doublestatistics();
+      _hasStats = s.has_minimum();
+      if (_hasStats) {
+        minimum = s.minimum();
+        maximum = s.maximum();
+        sum = s.sum();
+      }
+    }
+  }
+  virtual ~DoubleColumnStatisticsImpl() {}
+
+  bool hasMinimum() const { return _hasStats; }
+
+  bool hasMaximum() const { return _hasStats; }
+
+  bool hasSum() const { return _hasStats; }
+
+  double getMinimum() const {
+    if (_hasStats) {
+      return minimum;
+    } else {
+      LOG_ERROR(ERRCODE_INTERNAL_ERROR, "Minimum is not defined.");
+    }
+  }
+
+  double getMaximum() const {
+    if (_hasStats) {
+      return maximum;
+    } else {
+      LOG_ERROR(ERRCODE_INTERNAL_ERROR, "Maximum is not defined.");
+    }
+  }
+
+  double getSum() const {
+    if (_hasStats) {
+      return sum;
+    } else {
+      LOG_ERROR(ERRCODE_INTERNAL_ERROR, "Sum is not defined.");
+    }
+  }
+
+  void updateDouble(double value) {
+    if (!_hasStats) {
+      _hasStats = true;
+      minimum = value;
+      maximum = value;
+    } else if (value < minimum) {
+      minimum = value;
+    } else if (value > maximum) {
+      maximum = value;
+    }
+    sum += value;
+  }
+
+  void merge(const ColumnStatistics& stats) override {
+    ColumnStatisticsImpl::merge(stats);
+    const DoubleColumnStatisticsImpl* other =
+        dynamic_cast<const DoubleColumnStatisticsImpl*>(&stats);
+    assert(other != nullptr);
+    if (other->hasMinimum()) {
+      if (!_hasStats) {
+        _hasStats = other->hasMinimum();
+        minimum = other->getMinimum();
+        maximum = other->getMaximum();
+        sum = other->getSum();
+      } else {
+        if (other->getMinimum() < minimum) {
+          minimum = other->getMinimum();
+        }
+        if (other->getMaximum() > maximum) {
+          maximum = other->getMaximum();
+        }
+        sum += other->getSum();
+      }
+    }
+  }
+
+  void serialize(proto::ColumnStatistics* pb) override {
+    assert(pb != nullptr);
+    ColumnStatisticsImpl::serialize(pb);
+    proto::DoubleStatistics* stats = pb->mutable_doublestatistics();
+    if (_hasStats) {
+      stats->set_minimum(minimum);
+      stats->set_maximum(maximum);
+      stats->set_sum(sum);
+    }
+  }
+
+  void reset() override {
+    ColumnStatisticsImpl::reset();
+    resetInternal();
+  }
+
+ private:
+  void resetInternal() {
+    _hasStats = false;
+    minimum = 0;
+    maximum = 0;
+    sum = 0;
+  }
+};
+
+class StringColumnStatisticsImpl : public ColumnStatisticsImpl {
+ private:
+  bool _hasStats;
+  std::string minimum;
+  std::string maximum;
+  int64_t totalLength;
+
+ public:
+  StringColumnStatisticsImpl() : ColumnStatisticsImpl() { resetInternal(); }
+  StringColumnStatisticsImpl(const proto::ColumnStatistics& stats,
+                             bool correctStats)
+      : ColumnStatisticsImpl(stats) {
+    if (!stats.has_stringstatistics() || !correctStats) {
+      resetInternal();
+    } else {
+      const proto::StringStatistics& s = stats.stringstatistics();
+      _hasStats = s.has_minimum();
+      if (_hasStats) {
+        minimum = s.minimum();
+        maximum = s.maximum();
+        totalLength = s.sum();
+      }
+    }
+  }
+  virtual ~StringColumnStatisticsImpl() {}
+
+  bool hasMinimum() const { return _hasStats; }
+
+  bool hasMaximum() const { return _hasStats; }
+
+  bool hasTotalLength() const { return _hasStats; }
+
+  const char* getMinimum() const {
+    if (_hasStats) {
+      return minimum.c_str();
+    } else {
+      LOG_ERROR(ERRCODE_INTERNAL_ERROR, "Minimum is not defined.");
+    }
+  }
+
+  const char* getMaximum() const {
+    if (_hasStats) {
+      return maximum.c_str();
+    } else {
+      LOG_ERROR(ERRCODE_INTERNAL_ERROR, "Maximum is not defined.");
+    }
+  }
+
+  int64_t getTotalLength() const {
+    if (_hasStats) {
+      return totalLength;
+    } else {
+      LOG_ERROR(ERRCODE_INTERNAL_ERROR, "Total length is not defined.");
+    }
+  }
+
+  void updateString(const char* buffer, uint64_t len) {
+    std::string text(buffer, len);
+    if (!_hasStats) {
+      _hasStats = true;
+      maximum = minimum = text;
+    } else if (minimum > text) {
+      minimum = text;
+    } else if (maximum < text) {
+      maximum = text;
+    }
+    totalLength += len;
+  }
+
+  void merge(const ColumnStatistics& stats) override {
+    ColumnStatisticsImpl::merge(stats);
+    const StringColumnStatisticsImpl* other =
+        dynamic_cast<const StringColumnStatisticsImpl*>(&stats);
+    assert(other != nullptr);
+    if (other->hasMinimum()) {
+      if (!_hasStats) {
+        _hasStats = other->hasMinimum();
+        minimum = other->getMinimum();
+        maximum = other->getMaximum();
+        totalLength = other->getTotalLength();
+      } else {
+        if (other->getMinimum() < minimum) {
+          minimum = other->getMinimum();
+        }
+        if (other->getMaximum() > maximum) {
+          maximum = other->getMaximum();
+        }
+        totalLength += other->getTotalLength();
+      }
+    }
+  }
+
+  void serialize(proto::ColumnStatistics* pb) override {
+    assert(pb != nullptr);
+    ColumnStatisticsImpl::serialize(pb);
+    proto::StringStatistics* stats = pb->mutable_stringstatistics();
+    if (_hasStats) {
+      stats->set_minimum(minimum);
+      stats->set_maximum(maximum);
+      stats->set_sum(totalLength);
+    }
+  }
+
+  void reset() override {
+    ColumnStatisticsImpl::reset();
+    resetInternal();
+  }
+
+ private:
+  void resetInternal() {
+    _hasStats = false;
+    minimum.clear();
+    maximum.clear();
+    totalLength = 0;
+  }
+};
+
+class StreamInformation {
+ public:
+  virtual ~StreamInformation();
+
+  virtual StreamKind getKind() const = 0;
+  virtual uint64_t getColumnId() const = 0;
+  virtual uint64_t getOffset() const = 0;
+  virtual uint64_t getLength() const = 0;
+};
+
+class StripeInformation {
+ public:
+  virtual ~StripeInformation();
+
+  // Get the byte offset of the start of the stripe.
+  // @return the bytes from the start of the file
+  virtual uint64_t getOffset() const = 0;
+
+  // Get the total length of the stripe in bytes.
+  // @return the number of bytes in the stripe
+  virtual uint64_t getLength() const = 0;
+
+  // Get the length of the stripe's indexes.
+  // @return the number of bytes in the index
+  virtual uint64_t getIndexLength() const = 0;
+
+  // Get the length of the stripe's data.
+  // @return the number of bytes in the stripe
+  virtual uint64_t getDataLength() const = 0;
+
+  // Get the length of the stripe's tail section, which contains its index.
+  // @return the number of bytes in the tail
+  virtual uint64_t getFooterLength() const = 0;
+
+  // Get the number of rows in the stripe.
+  // @return a count of the number of rows
+  virtual uint64_t getNumberOfRows() const = 0;
+
+  // Get the number of streams in the stripe.
+  virtual uint64_t getNumberOfStreams() const = 0;
+
+  // Get the StreamInformation for the given stream.
+  virtual std::unique_ptr<StreamInformation> getStreamInformation(
+      uint64_t streamId) const = 0;
+
+  // Get the dictionary size.
+  // @param colId the columnId
+  // @return the size of the dictionary or 0 if there isn't one
+  virtual uint64_t getDictionarySize(uint64_t colId) const = 0;
+
+  // Get the writer timezone.
+  virtual const std::string& getWriterTimezone() const = 0;
+};
+
+class BinaryColumnStatisticsImpl : public ColumnStatisticsImpl {
+ public:
+  BinaryColumnStatisticsImpl() { resetInternal(); }
+  BinaryColumnStatisticsImpl(const proto::ColumnStatistics& stats,
+                             bool correctStats)
+      : ColumnStatisticsImpl(stats) {
+    if (!stats.has_binarystatistics() || !correctStats) {
+      resetInternal();
+    } else {
+      const proto::BinaryStatistics& s = stats.binarystatistics();
+      _hasStats = s.has_sum();
+      if (_hasStats) {
+        totalLength = s.sum();
+      }
+    }
+  }
+  virtual ~BinaryColumnStatisticsImpl() {}
+
+  bool hasTotalLength() const { return _hasStats; }
+
+  uint64_t getTotalLength() const {
+    if (_hasStats) {
+      return totalLength;
+    } else {
+      LOG_ERROR(ERRCODE_INTERNAL_ERROR, "Total length is not defined.");
+    }
+  }
+
+  void update(size_t length) {
+    _hasStats = true;
+    totalLength += length;
+  }
+
+  void merge(const ColumnStatistics& stats) override {
+    ColumnStatisticsImpl::merge(stats);
+    const BinaryColumnStatisticsImpl* other =
+        dynamic_cast<const BinaryColumnStatisticsImpl*>(&stats);
+    assert(other != nullptr);
+    if (other->hasTotalLength()) {
+      if (!_hasStats) {
+        _hasStats = other->hasTotalLength();
+        totalLength = other->getTotalLength();
+      } else {
+        totalLength += other->getTotalLength();
+      }
+    }
+  }
+
+  void serialize(proto::ColumnStatistics* pb) override {
+    assert(pb != nullptr);
+    ColumnStatisticsImpl::serialize(pb);
+
+    proto::BinaryStatistics* binStats = pb->mutable_binarystatistics();
+    if (_hasStats) {
+      binStats->set_sum(totalLength);
+    }
+  }
+
+ private:
+  void resetInternal() {
+    ColumnStatisticsImpl::reset();
+    _hasStats = false;
+    totalLength = 0;
+  }
+
+  bool _hasStats;
+  int64_t totalLength;
+};
+
+class BooleanColumnStatisticsImpl : public ColumnStatisticsImpl {
+ private:
+  bool _hasCount;
+  uint64_t trueCount;
+
+ public:
+  BooleanColumnStatisticsImpl() : ColumnStatisticsImpl() { resetInternal(); }
+  BooleanColumnStatisticsImpl(const proto::ColumnStatistics& stats,
+                              bool correctStats)
+      : ColumnStatisticsImpl(stats) {
+    if (!stats.has_bucketstatistics() || !correctStats ||
+        stats.bucketstatistics().count_size() == 0) {
+      resetInternal();
+    } else {
+      _hasCount = true;
+      trueCount = stats.bucketstatistics().count(0);
+    }
+  }
+  virtual ~BooleanColumnStatisticsImpl() {}
+
+  bool hasCount() const { return _hasCount; }
+
+  uint64_t getFalseCount() const {
+    if (_hasCount) {
+      return valueCount - trueCount;
+    } else {
+      LOG_ERROR(ERRCODE_INTERNAL_ERROR, "False count is not defined.");
+    }
+  }
+
+  uint64_t getTrueCount() const {
+    if (_hasCount) {
+      return trueCount;
+    } else {
+      LOG_ERROR(ERRCODE_INTERNAL_ERROR, "True count is not defined.");
+    }
+  }
+
+  void updateBoolean(bool value) {
+    if (!_hasCount) _hasCount = true;
+    if (value) trueCount += 1;
+  }
+
+  void merge(const ColumnStatistics& stats) override {
+    ColumnStatisticsImpl::merge(stats);
+    const BooleanColumnStatisticsImpl* other =
+        dynamic_cast<const BooleanColumnStatisticsImpl*>(&stats);
+    assert(other != nullptr);
+    if (other->hasCount()) {
+      if (!_hasCount) {
+        _hasCount = true;
+        trueCount = other->trueCount;
+      } else {
+        trueCount += other->trueCount;
+      }
+    }
+  }
+
+  void serialize(proto::ColumnStatistics* pb) override {
+    assert(pb != nullptr);
+    ColumnStatisticsImpl::serialize(pb);
+    proto::BucketStatistics* stats = pb->mutable_bucketstatistics();
+    if (_hasCount) stats->add_count(trueCount);
+  }
+
+  void reset() override {
+    ColumnStatisticsImpl::reset();
+    resetInternal();
+  }
+
+ private:
+  void resetInternal() {
+    _hasCount = false;
+    trueCount = 0;
+  }
+};
+
+class DateColumnStatisticsImpl : public ColumnStatisticsImpl {
+ private:
+  bool _hasStats;
+  int32_t minimum;
+  int32_t maximum;
+
+ public:
+  DateColumnStatisticsImpl() : ColumnStatisticsImpl() { resetInternal(); }
+  DateColumnStatisticsImpl(const proto::ColumnStatistics& stats,
+                           bool correctStats);
+  virtual ~DateColumnStatisticsImpl();
+
+  bool hasMinimum() const { return _hasStats; }
+
+  bool hasMaximum() const { return _hasStats; }
+
+  int32_t getMinimum() const {
+    if (_hasStats) {
+      return minimum;
+    } else {
+      LOG_ERROR(ERRCODE_INTERNAL_ERROR, "Minimum is not defined.");
+    }
+  }
+
+  int32_t getMaximum() const {
+    if (_hasStats) {
+      return maximum;
+    } else {
+      LOG_ERROR(ERRCODE_INTERNAL_ERROR, "Maximum is not defined.");
+    }
+  }
+
+  void updateDate(int64_t value) {
+    if (!_hasStats) {
+      _hasStats = true;
+      minimum = value;
+      maximum = value;
+    } else if (value < minimum) {
+      minimum = value;
+    } else if (value > maximum) {
+      maximum = value;
+    }
+  }
+
+  void merge(const ColumnStatistics& stats) override {
+    ColumnStatisticsImpl::merge(stats);
+    const DateColumnStatisticsImpl* other =
+        dynamic_cast<const DateColumnStatisticsImpl*>(&stats);
+    assert(other != nullptr);
+    if (other->hasMinimum()) {
+      if (!_hasStats) {
+        _hasStats = other->hasMinimum();
+        minimum = other->getMinimum();
+        maximum = other->getMaximum();
+      } else {
+        if (other->getMinimum() < minimum) {
+          minimum = other->getMinimum();
+        }
+        if (other->getMaximum() > maximum) {
+          maximum = other->getMaximum();
+        }
+      }
+    }
+  }
+
+  void serialize(proto::ColumnStatistics* pb) override {
+    assert(pb != nullptr);
+    ColumnStatisticsImpl::serialize(pb);
+    proto::DateStatistics* stats = pb->mutable_datestatistics();
+    if (_hasStats) {
+      stats->set_minimum(minimum);
+      stats->set_maximum(maximum);
+    }
+  }
+
+  void reset() override {
+    ColumnStatisticsImpl::reset();
+    resetInternal();
+  }
+
+ private:
+  void resetInternal() {
+    _hasStats = false;
+    minimum = 0;
+    maximum = 0;
+  }
+};
+
+class DecimalColumnStatisticsImpl : public ColumnStatisticsImpl {
+ private:
+  bool _hasMinimum;
+  bool _hasMaximum;
+  bool _hasSum;
+  std::string minimum;
+  std::string maximum;
+  std::string sum;
+
+ public:
+  DecimalColumnStatisticsImpl() : ColumnStatisticsImpl() { resetInternal(); }
+  DecimalColumnStatisticsImpl(const proto::ColumnStatistics& stats,
+                              bool correctStats);
+  virtual ~DecimalColumnStatisticsImpl();
+
+  bool hasMinimum() const { return _hasMinimum; }
+
+  bool hasMaximum() const { return _hasMaximum; }
+
+  bool hasSum() const { return _hasSum; }
+
+  Decimal getMinimum() const {
+    if (_hasMinimum) {
+      return Decimal(minimum);
+    } else {
+      LOG_ERROR(ERRCODE_INTERNAL_ERROR, "Minimum is not defined.");
+    }
+  }
+
+  Decimal getMaximum() const {
+    if (_hasMaximum) {
+      return Decimal(maximum);
+    } else {
+      LOG_ERROR(ERRCODE_INTERNAL_ERROR, "Maximum is not defined.");
+    }
+  }
+
+  Decimal getSum() const {
+    if (_hasSum) {
+      return Decimal(sum);
+    } else {
+      LOG_ERROR(ERRCODE_INTERNAL_ERROR, "Sum is not defined.");
+    }
+  }
+
+  const char* getMinimumStr() const {
+    if (_hasMinimum) {
+      return minimum.c_str();
+    } else {
+      LOG_ERROR(ERRCODE_INTERNAL_ERROR, "Minimum is not defined.");
+    }
+  }
+
+  const char* getMaximumStr() const {
+    if (_hasMaximum) {
+      return maximum.c_str();
+    } else {
+      LOG_ERROR(ERRCODE_INTERNAL_ERROR, "Maximum is not defined.");
+    }
+  }
+
+  const char* getSumStr() const {
+    if (_hasSum) {
+      return sum.c_str();
+    } else {
+      LOG_ERROR(ERRCODE_INTERNAL_ERROR, "Sum is not defined.");
+    }
+  }
+
+  void updateDecimal(const orc::Decimal& value) {
+    if (!_hasMinimum) {
+      _hasMinimum = true;
+      _hasMaximum = true;
+      minimum = value.toString();
+      maximum = value.toString();
+    } else if (value < this->getMinimum()) {
+      minimum = value.toString();
+    } else if (value > this->getMaximum()) {
+      maximum = value.toString();
+    }
+    if (_hasSum) {
+      updateSum(value);
+    } else {
+      _hasSum = true;
+      sum = value.toString();
+    }
+  }
+
+  void merge(const ColumnStatistics& stats) override {
+    ColumnStatisticsImpl::merge(stats);
+    const DecimalColumnStatisticsImpl* other =
+        dynamic_cast<const DecimalColumnStatisticsImpl*>(&stats);
+    assert(other != nullptr);
+    if (other->hasMinimum()) {
+      if (!_hasMinimum) {
+        _hasMinimum = true;
+        _hasMaximum = true;
+        minimum = other->getMinimum().toString();
+        maximum = other->getMaximum().toString();
+      } else {
+        if (other->getMinimum() < this->getMinimum()) {
+          minimum = other->getMinimum().toString();
+        }
+        if (other->getMaximum() > this->getMaximum()) {
+          maximum = other->getMaximum().toString();
+        }
+      }
+    }
+    if (other->hasSum()) {
+      if (_hasSum) {
+        Decimal otherSum = other->getSum();
+        updateSum(otherSum);
+      } else {
+        _hasSum = true;
+        sum = other->getSum().toString();
+      }
+    }
+  }
+
+  void serialize(proto::ColumnStatistics* pb) override {
+    assert(pb != nullptr);
+    ColumnStatisticsImpl::serialize(pb);
+    proto::DecimalStatistics* stats = pb->mutable_decimalstatistics();
+    if (_hasMinimum) {
+      stats->set_minimum(minimum);
+      stats->set_maximum(maximum);
+    }
+    if (_hasSum) stats->set_sum(sum);
+  }
+
+  void reset() override {
+    ColumnStatisticsImpl::reset();
+    resetInternal();
+  }
+
+ private:
+  void updateSum(Decimal value);
+  void resetInternal() {
+    _hasMaximum = false;
+    _hasMinimum = false;
+    _hasSum = false;
+    minimum.clear();
+    maximum.clear();
+    sum.clear();
+  }
+};
+
+class TimestampColumnStatisticsImpl : public ColumnStatisticsImpl {
+ private:
+  bool _hasStats;
+  int64_t minimum;
+  int64_t maximum;
+
+ public:
+  TimestampColumnStatisticsImpl() : ColumnStatisticsImpl() { resetInternal(); }
+  TimestampColumnStatisticsImpl(const proto::ColumnStatistics& stats,
+                                bool correctStats);
+
+  virtual ~TimestampColumnStatisticsImpl();
+
+  bool hasMinimum() const { return _hasStats; }
+
+  bool hasMaximum() const { return _hasStats; }
+
+  int64_t getMinimum() const {
+    if (_hasStats) {
+      return minimum;
+    } else {
+      LOG_ERROR(ERRCODE_INTERNAL_ERROR, "Minimum is not defined.");
+    }
+  }
+
+  int64_t getMaximum() const {
+    if (_hasStats) {
+      return maximum;
+    } else {
+      LOG_ERROR(ERRCODE_INTERNAL_ERROR, "Maximum is not defined.");
+    }
+  }
+
+  void updateTimestamp(const int64_t val) {
+    if (!_hasStats) {
+      _hasStats = true;
+      maximum = minimum = val;
+    } else if (minimum > val) {
+      minimum = val;
+    } else if (maximum < val) {
+      maximum = val;
+    }
+  }
+
+  void merge(const ColumnStatistics& stats) override {
+    ColumnStatisticsImpl::merge(stats);
+    const TimestampColumnStatisticsImpl* other =
+        dynamic_cast<const TimestampColumnStatisticsImpl*>(&stats);
+    assert(other != nullptr);
+    if (other->hasMinimum()) {
+      if (!_hasStats) {
+        _hasStats = other->hasMinimum();
+        minimum = other->getMinimum();
+        maximum = other->getMaximum();
+      } else {
+        if (other->getMinimum() < minimum) {
+          minimum = other->getMinimum();
+        }
+        if (other->getMaximum() > maximum) {
+          maximum = other->getMaximum();
+        }
+      }
+    }
+  }
+
+  void serialize(proto::ColumnStatistics* pb) override {
+    assert(pb != nullptr);
+    ColumnStatisticsImpl::serialize(pb);
+    proto::TimestampStatistics* stats = pb->mutable_timestampstatistics();
+    if (_hasStats) {
+      stats->set_minimum(minimum);
+      stats->set_maximum(maximum);
+    }
+  }
+
+  void reset() override {
+    ColumnStatisticsImpl::reset();
+    resetInternal();
+  }
+
+ private:
+  void resetInternal() {
+    _hasStats = false;
+    minimum = 0;
+    maximum = 0;
+  }
+
+  int8_t compare(dbcommon::Timestamp ts1, dbcommon::Timestamp ts2) {
+    const int64_t val1 = ts1.second;
+    const int64_t nano1 = ts1.nanosecond;
+    const int64_t val2 = ts2.second;
+    const int64_t nano2 = ts2.nanosecond;
+    if (val1 == val2) {
+      if (nano1 > nano2)
+        return 1;
+      else if (nano1 == nano2)
+        return 0;
+      else
+        return -1;
+    } else if (val1 > val2) {
+      return 1;
+    } else {
+      return -1;
+    }
+  }
+};
+
+class StreamInformationImpl : public StreamInformation {
+ private:
+  StreamKind kind;
+  uint64_t column;
+  uint64_t offset;
+  uint64_t length;
+
+ public:
+  StreamInformationImpl(uint64_t _offset, const proto::Stream& stream)
+      : kind(static_cast<StreamKind>(stream.kind())),
+        column(stream.column()),
+        offset(_offset),
+        length(stream.length()) {
+    // PASS
+  }
+
+  ~StreamInformationImpl() {}
+
+  StreamKind getKind() const override { return kind; }
+
+  uint64_t getColumnId() const override { return column; }
+
+  uint64_t getOffset() const override { return offset; }
+
+  uint64_t getLength() const override { return length; }
+};
+
+class StripeInformationImpl : public StripeInformation {
+  uint64_t offset;
+  uint64_t indexLength;
+  uint64_t dataLength;
+  uint64_t footerLength;
+  uint64_t numRows;
+  InputStream* stream;
+  dbcommon::MemoryPool& memoryPool;
+  CompressionKind compression;
+  uint64_t blockSize;
+  mutable std::unique_ptr<proto::StripeFooter> stripeFooter;
+  void ensureStripeFooterLoaded() const;
+
+ public:
+  StripeInformationImpl(uint64_t _offset, uint64_t _indexLength,
+                        uint64_t _dataLength, uint64_t _footerLength,
+                        uint64_t _numRows, InputStream* _stream,
+                        dbcommon::MemoryPool& pool,  // NOLINT
+                        CompressionKind _compression, uint64_t _blockSize)
+      : offset(_offset),
+        indexLength(_indexLength),
+        dataLength(_dataLength),
+        footerLength(_footerLength),
+        numRows(_numRows),
+        stream(_stream),
+        memoryPool(pool),
+        compression(_compression),
+        blockSize(_blockSize) {
+    // PASS
+  }
+
+  virtual ~StripeInformationImpl() {
+    // PASS
+  }
+
+  uint64_t getOffset() const override { return offset; }
+
+  uint64_t getLength() const override {
+    return indexLength + dataLength + footerLength;
+  }
+  uint64_t getIndexLength() const override { return indexLength; }
+
+  uint64_t getDataLength() const override { return dataLength; }
+
+  uint64_t getFooterLength() const override { return footerLength; }
+
+  uint64_t getNumberOfRows() const override { return numRows; }
+
+  uint64_t getNumberOfStreams() const override {
+    ensureStripeFooterLoaded();
+    return static_cast<uint64_t>(stripeFooter->streams_size());
+  }
+
+  std::unique_ptr<StreamInformation> getStreamInformation(
+      uint64_t streamId) const override;
+
+  uint64_t getDictionarySize(uint64_t colId) const override {
+    ensureStripeFooterLoaded();
+    return static_cast<uint64_t>(
+        stripeFooter->columns(static_cast<int>(colId)).dictionarysize());
+  }
+
+  const std::string& getWriterTimezone() const override {
+    ensureStripeFooterLoaded();
+    return stripeFooter->writertimezone();
+  }
+};
+
+univplan::ColumnStatistics* convertColumnStatistics(
+    const proto::ColumnStatistics& s, bool correctStats);
+std::unique_ptr<ColumnStatisticsImpl> createColumnStatistics(
+    const orc::Type* type);
+
+class StatisticsImpl : public univplan::Statistics {
+ private:
+  std::list<univplan::ColumnStatistics*> colStats;
+
+  // DELIBERATELY NOT IMPLEMENTED
+  StatisticsImpl(const StatisticsImpl&);
+  StatisticsImpl& operator=(const StatisticsImpl&);
+
+ public:
+  StatisticsImpl(const proto::StripeStatistics& stripeStats,
+                 bool correctStats) {
+    for (int i = 0; i < stripeStats.colstats_size(); i++) {
+      colStats.push_back(
+          convertColumnStatistics(stripeStats.colstats(i), correctStats));
+    }
+  }
+
+  StatisticsImpl(const proto::Footer& footer, bool correctStats) {
+    for (int i = 0; i < footer.statistics_size(); i++) {
+      colStats.push_back(
+          convertColumnStatistics(footer.statistics(i), correctStats));
+    }
+  }
+
+  const univplan::ColumnStatistics* getColumnStatistics(
+      uint32_t columnId) const override {
+    std::list<univplan::ColumnStatistics*>::const_iterator it =
+        colStats.begin();
+    std::advance(it, static_cast<int64_t>(columnId));
+    return *it;
+  }
+
+  virtual ~StatisticsImpl();
+
+  uint32_t getNumberOfColumns() const override {
+    return static_cast<uint32_t>(colStats.size());
+  }
+};
+
+}  // namespace orc
+
+#endif  // STORAGE_SRC_STORAGE_FORMAT_ORC_ORC_PROTO_DEFINITION_H_
diff --git a/depends/storage/src/storage/format/orc/orc_proto.proto b/depends/storage/src/storage/format/orc/orc_proto.proto
new file mode 100644
index 0000000..0c8027f
--- /dev/null
+++ b/depends/storage/src/storage/format/orc/orc_proto.proto
@@ -0,0 +1,277 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = "proto2";
+
+package orc.proto;
+
+option java_package = "org.apache.orc";
+
+message IntegerStatistics {
+  optional sint64 minimum = 1;
+  optional sint64 maximum = 2;
+  optional sint64 sum = 3;
+}
+
+message DoubleStatistics {
+  optional double minimum = 1;
+  optional double maximum = 2;
+  optional double sum = 3;
+}
+
+message StringStatistics {
+  optional string minimum = 1;
+  optional string maximum = 2;
+  // sum will store the total length of all strings in a stripe
+  optional sint64 sum = 3;
+}
+
+message BucketStatistics {
+  repeated uint64 count = 1 [packed = true];
+}
+
+message DecimalStatistics {
+  optional string minimum = 1;
+  optional string maximum = 2;
+  optional string sum = 3;
+}
+
+message DateStatistics {
+  // min,max values saved as days since epoch
+  optional sint32 minimum = 1;
+  optional sint32 maximum = 2;
+}
+
+message TimestampStatistics {
+  // min,max values saved as milliseconds since epoch
+  optional sint64 minimum = 1;
+  optional sint64 maximum = 2;
+  optional sint64 minimumUtc = 3;
+  optional sint64 maximumUtc = 4;
+}
+
+message BinaryStatistics {
+  // sum will store the total binary blob length in a stripe
+  optional sint64 sum = 1;
+}
+
+message ColumnStatistics {
+  optional uint64 numberOfValues = 1;
+  optional IntegerStatistics intStatistics = 2;
+  optional DoubleStatistics doubleStatistics = 3;
+  optional StringStatistics stringStatistics = 4;
+  optional BucketStatistics bucketStatistics = 5;
+  optional DecimalStatistics decimalStatistics = 6;
+  optional DateStatistics dateStatistics = 7;
+  optional BinaryStatistics binaryStatistics = 8;
+  optional TimestampStatistics timestampStatistics = 9;
+  optional bool hasNull = 10;
+}
+
+message RowIndexEntry {
+  repeated uint64 positions = 1 [packed = true];
+  optional ColumnStatistics statistics = 2;
+}
+
+message RowIndex {
+  repeated RowIndexEntry entry = 1;
+}
+
+message BloomFilter {
+  optional uint32 numHashFunctions = 1;
+  repeated fixed64 bitset = 2;
+  optional bytes utf8bitset = 3;
+}
+
+message BloomFilterIndex {
+  repeated BloomFilter bloomFilter = 1;
+}
+
+message Stream {
+  // if you add new index stream kinds, you need to make sure to update
+  // StreamName to ensure it is added to the stripe in the right area
+  enum Kind {
+    PRESENT = 0;
+    DATA = 1;
+    LENGTH = 2;
+    DICTIONARY_DATA = 3;
+    DICTIONARY_COUNT = 4;
+    SECONDARY = 5;
+    ROW_INDEX = 6;
+    BLOOM_FILTER = 7;
+    BLOOM_FILTER_UTF8 = 8;
+  }
+  optional Kind kind = 1;
+  optional uint32 column = 2;
+  optional uint64 length = 3;
+}
+
+message ColumnEncoding {
+  enum Kind {
+    DIRECT = 0;
+    DICTIONARY = 1;
+    DIRECT_V2 = 2;
+    DICTIONARY_V2 = 3;
+    DIRECT_V0 = 4;
+    DICTIONARY_V0 = 5;
+  }
+  optional Kind kind = 1;
+  optional uint32 dictionarySize = 2;
+
+  // The encoding of the bloom filters for this column:
+  //   0 or missing = none or original
+  //   1            = ORC-135 (utc for timestamps)
+  optional uint32 bloomEncoding = 3;
+}
+
+message StripeFooter {
+  repeated Stream streams = 1;
+  repeated ColumnEncoding columns = 2;
+  optional string writerTimezone = 3;
+}
+
+message Type {
+  enum Kind {
+    BOOLEAN = 0;
+    BYTE = 1;
+    SHORT = 2;
+    INT = 3;
+    LONG = 4;
+    FLOAT = 5;
+    DOUBLE = 6;
+    STRING = 7;
+    BINARY = 8;
+    TIMESTAMP = 9;
+    LIST = 10;
+    MAP = 11;
+    STRUCT = 12;
+    UNION = 13;
+    DECIMAL = 14;
+    DATE = 15;
+    VARCHAR = 16;
+    CHAR = 17;
+    TIME = 18;
+  }
+  optional Kind kind = 1;
+  repeated uint32 subtypes = 2 [packed = true];
+  repeated string fieldNames = 3;
+  optional uint32 maximumLength = 4;
+  optional uint32 precision = 5;
+  optional uint32 scale = 6;
+}
+
+message StripeInformation {
+  optional uint64 offset = 1;
+  optional uint64 indexLength = 2;
+  optional uint64 dataLength = 3;
+  optional uint64 footerLength = 4;
+  optional uint64 numberOfRows = 5;
+}
+
+message UserMetadataItem {
+  optional string name = 1;
+  optional bytes value = 2;
+}
+
+message StripeStatistics {
+  repeated ColumnStatistics colStats = 1;
+}
+
+message Metadata {
+  repeated StripeStatistics stripeStats = 1;
+}
+
+message Footer {
+  optional uint64 headerLength = 1;
+  optional uint64 contentLength = 2;
+  repeated StripeInformation stripes = 3;
+  repeated Type types = 4;
+  repeated UserMetadataItem metadata = 5;
+  optional uint64 numberOfRows = 6;
+  repeated ColumnStatistics statistics = 7;
+  optional uint32 rowIndexStride = 8;
+
+  // Each implementation that writes ORC files should register for a code
+  // 0 = ORC Java
+  // 1 = ORC C++
+  // 2 = Presto
+  // 3 = Scritchley Go from https://github.com/scritchley/orc
+  optional uint32 writer = 9;
+}
+
+enum CompressionKind {
+  NONE = 0;
+  ZLIB = 1;
+  SNAPPY = 2;
+  LZO = 3;
+  LZ4 = 4;
+  ZSTD = 5;
+}
+
+// Serialized length must be less that 255 bytes
+message PostScript {
+  optional uint64 footerLength = 1;
+  optional CompressionKind compression = 2;
+  optional uint64 compressionBlockSize = 3;
+  // the version of the file format
+  //   [0, 11] = Hive 0.11
+  //   [0, 12] = Hive 0.12
+  repeated uint32 version = 4 [packed = true];
+  optional uint64 metadataLength = 5;
+
+  // The version of the writer that wrote the file. This number is
+  // updated when we make fixes or large changes to the writer so that
+  // readers can detect whether a given bug is present in the data.
+  //
+  // Only the Java ORC writer may use values under 6 (or missing) so that
+  // readers that predate ORC-202 treat the new writers correctly. Each
+  // writer should assign their own sequence of versions starting from 6.
+  //
+  // Version of the ORC Java writer:
+  //   0 = original
+  //   1 = HIVE-8732 fixed (fixed stripe/file maximum statistics &
+  //                        string statistics use utf8 for min/max)
+  //   2 = HIVE-4243 fixed (use real column names from Hive tables)
+  //   3 = HIVE-12055 fixed (vectorized writer implementation)
+  //   4 = HIVE-13083 fixed (decimals write present stream correctly)
+  //   5 = ORC-101 fixed (bloom filters use utf8 consistently)
+  //   6 = ORC-135 fixed (timestamp statistics use utc)
+  //
+  // Version of the ORC C++ writer:
+  //   6 = original
+  //
+  // Version of the Presto writer:
+  //   6 = original
+  //
+  // Version of the Scritchley Go writer:
+  //   6 = original
+  //
+  optional uint32 writerVersion = 6;
+
+  // Leave this last in the record
+  optional string magic = 8000;
+}
+
+// The contents of the file tail that must be serialized.
+// This gets serialized as part of OrcSplit, also used by footer cache.
+message FileTail {
+  optional PostScript postscript = 1;
+  optional Footer footer = 2;
+  optional uint64 fileLength = 3;
+  optional uint64 postscriptLength = 4;
+}
diff --git a/depends/storage/src/storage/format/orc/output-stream.cc b/depends/storage/src/storage/format/orc/output-stream.cc
new file mode 100644
index 0000000..29026f9
--- /dev/null
+++ b/depends/storage/src/storage/format/orc/output-stream.cc
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <string>
+
+#include "storage/format/orc/output-stream.h"
+
+namespace orc {
+
+std::unique_ptr<OutputStream> writeFile(dbcommon::FileSystem *fs,
+                                        const std::string &path) {
+  std::unique_ptr<OutputStream> os(new GeneralFileOutputStream(fs, path));
+
+  return std::move(os);
+}
+
+}  // end of namespace orc
diff --git a/depends/storage/src/storage/format/orc/output-stream.h b/depends/storage/src/storage/format/orc/output-stream.h
new file mode 100644
index 0000000..5abc687
--- /dev/null
+++ b/depends/storage/src/storage/format/orc/output-stream.h
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef STORAGE_SRC_STORAGE_FORMAT_ORC_OUTPUT_STREAM_H_
+#define STORAGE_SRC_STORAGE_FORMAT_ORC_OUTPUT_STREAM_H_
+
+#include <cassert>
+#include <string>
+
+#include "dbcommon/filesystem/file-system.h"
+
+namespace orc {
+
+class OutputStream {
+ public:
+  OutputStream() {}
+  virtual ~OutputStream() {}
+
+  // Write length bytes from the buffer to the file
+  // @param buf The output buffer
+  // @param length The number of bytes in the buffer to write
+  // @return Void
+  virtual void write(void* buf, uint64_t length) = 0;
+
+  // Get the name of the stream for error messages.
+  // @return The stream name
+  virtual const std::string& getName() const = 0;
+
+  // Get the natural size for reads.
+  // @return the number of bytes that should be write at once
+  virtual uint64_t getNaturalWriteSize() const = 0;
+
+  // Get the total length of the file in bytes.
+  // @return The length
+  virtual uint64_t getLength() const = 0;
+
+  // Get current file position
+  // @return Current file position
+  virtual uint64_t getPosition() const = 0;
+
+  // Padding given bytes to the file
+  // @param size The bytes to pad
+  // @return Void
+  virtual void padding(uint64_t size) = 0;
+
+  // Close the stream
+  // @return Void
+  virtual void close() = 0;
+};
+
+class GeneralFileOutputStream : public OutputStream {
+ public:
+  GeneralFileOutputStream(dbcommon::FileSystem* fs, std::string fileName)
+      : fs(fs), fileName(fileName) {
+    file = NULL;
+    totalLength = -1;
+  }
+
+  virtual ~GeneralFileOutputStream() {}
+
+  uint64_t getLength() const override { return totalLength; }
+
+  uint64_t getNaturalWriteSize() const override { return 128 * 1024; }
+
+  void write(void* buf, uint64_t length) override {
+    assert(buf != nullptr);
+
+    if (!file) {
+      file = fs->open(fileName.c_str(), O_WRONLY);
+      totalLength = fs->getFileLength(fileName.c_str());
+    }
+    fs->write(file.get(), buf, length);
+  }
+
+  const std::string& getName() const override { return fileName; }
+
+  uint64_t getPosition() const override { return fs->tell(file.get()); }
+
+  void padding(uint64_t size) override {
+    static char buffer[1024] = {0};
+
+    if (size > 0) {
+      // we use the first byte of the padding area as FASTBlock type.
+      // so we set the padding area to 0
+      int times = size / sizeof(buffer);
+      int left = size % sizeof(buffer);
+
+      for (int i = 0; i < times; i++)
+        fs->write(file.get(), buffer, sizeof(buffer));
+
+      if (left > 0) fs->write(file.get(), buffer, left);
+    }
+  }
+
+  void close() override {
+    if (file) {
+      file->close();
+    }
+  }
+
+  bool fileopen() {
+    if (file)
+      return true;
+    else
+      return false;
+  }
+
+ private:
+  std::string fileName;
+  std::unique_ptr<dbcommon::File> file;
+  uint64_t totalLength = 0;
+  dbcommon::FileSystem* fs = nullptr;
+};
+
+std::unique_ptr<OutputStream> writeFile(dbcommon::FileSystem* fs,
+                                        const std::string& path);
+
+}  //  end of namespace orc
+#endif  // STORAGE_SRC_STORAGE_FORMAT_ORC_OUTPUT_STREAM_H_
diff --git a/depends/storage/src/storage/format/orc/reader.cc b/depends/storage/src/storage/format/orc/reader.cc
new file mode 100644
index 0000000..e88e62a
--- /dev/null
+++ b/depends/storage/src/storage/format/orc/reader.cc
@@ -0,0 +1,2424 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <google/protobuf/io/coded_stream.h>
+
+#include <math.h>
+#include <algorithm>
+#include <iostream>
+#include <iterator>
+#include <limits>
+#include <list>
+#include <map>
+#include <memory>
+#include <set>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "storage/format/orc/byte-rle.h"
+#include "storage/format/orc/exceptions.h"
+#include "storage/format/orc/input-stream.h"
+#include "storage/format/orc/int128.h"
+#include "storage/format/orc/orc-predicates.h"
+#include "storage/format/orc/reader.h"
+#include "storage/format/orc/rle.h"
+#include "storage/format/orc/type-impl.h"
+
+namespace orc {
+
+enum ColumnSelection {
+  ColumnSelection_NONE = 0,
+  ColumnSelection_NAMES = 1,
+  ColumnSelection_FIELD_IDS = 2,
+  ColumnSelection_TYPE_IDS = 3
+};
+
+struct ReaderOptionsPrivate {
+  ColumnSelection selection;
+  std::list<uint64_t> includedColumnIndexes;
+  std::list<std::string> includedColumnNames;
+  uint64_t dataStart;
+  uint64_t dataLength;
+  uint64_t tailLocation;
+  bool throwOnHive11DecimalOverflow;
+  int32_t forcedScaleOnHive11Decimal;
+  std::ostream* errorStream;
+  dbcommon::MemoryPool* memoryPool;
+  std::string serializedTail;
+  const univplan::UnivPlanExprPolyList* predicateExprs;
+  const dbcommon::TupleDesc* td;
+  bool readStatisticsOnly;
+
+  ReaderOptionsPrivate() {
+    selection = ColumnSelection_NONE;
+    dataStart = 0;
+    dataLength = std::numeric_limits<uint64_t>::max();
+    tailLocation = std::numeric_limits<uint64_t>::max();
+    throwOnHive11DecimalOverflow = true;
+    forcedScaleOnHive11Decimal = 6;
+    errorStream = &std::cerr;
+    memoryPool = dbcommon::getDefaultPool();
+    predicateExprs = nullptr;
+    td = nullptr;
+    readStatisticsOnly = false;
+  }
+};
+
+ReaderOptions::ReaderOptions()
+    : privateBits(
+          std::unique_ptr<ReaderOptionsPrivate>(new ReaderOptionsPrivate())) {
+  // PASS
+}
+
+ReaderOptions::ReaderOptions(const ReaderOptions& rhs)
+    : privateBits(std::unique_ptr<ReaderOptionsPrivate>(
+          new ReaderOptionsPrivate(*(rhs.privateBits.get())))) {
+  // PASS
+}
+
+ReaderOptions::ReaderOptions(ReaderOptions& rhs) {
+  // swap privateBits with rhs
+  ReaderOptionsPrivate* l = privateBits.release();
+  privateBits.reset(rhs.privateBits.release());
+  rhs.privateBits.reset(l);
+}
+
+ReaderOptions& ReaderOptions::operator=(const ReaderOptions& rhs) {
+  if (this != &rhs) {
+    privateBits.reset(new ReaderOptionsPrivate(*(rhs.privateBits.get())));
+  }
+  return *this;
+}
+
+ReaderOptions::~ReaderOptions() {
+  // PASS
+}
+
+ReaderOptions& ReaderOptions::include(const std::list<uint64_t>& include) {
+  privateBits->selection = ColumnSelection_FIELD_IDS;
+  privateBits->includedColumnIndexes.assign(include.begin(), include.end());
+  privateBits->includedColumnNames.clear();
+  return *this;
+}
+
+ReaderOptions& ReaderOptions::include(const std::list<std::string>& include) {
+  privateBits->selection = ColumnSelection_NAMES;
+  privateBits->includedColumnNames.assign(include.begin(), include.end());
+  privateBits->includedColumnIndexes.clear();
+  return *this;
+}
+
+ReaderOptions& ReaderOptions::includeTypes(const std::list<uint64_t>& types) {
+  privateBits->selection = ColumnSelection_TYPE_IDS;
+  privateBits->includedColumnIndexes.assign(types.begin(), types.end());
+  privateBits->includedColumnNames.clear();
+  return *this;
+}
+
+ReaderOptions& ReaderOptions::range(uint64_t offset, uint64_t length) {
+  privateBits->dataStart = offset;
+  privateBits->dataLength = length;
+  return *this;
+}
+
+ReaderOptions& ReaderOptions::setTailLocation(uint64_t offset) {
+  privateBits->tailLocation = offset;
+  return *this;
+}
+
+ReaderOptions& ReaderOptions::setSerializedFileTail(const std::string& value) {
+  privateBits->serializedTail = value;
+  return *this;
+}
+
+dbcommon::MemoryPool* ReaderOptions::getMemoryPool() const {
+  return privateBits->memoryPool;
+}
+
+bool ReaderOptions::getIndexesSet() const {
+  return privateBits->selection == ColumnSelection_FIELD_IDS;
+}
+
+bool ReaderOptions::getTypeIdsSet() const {
+  return privateBits->selection == ColumnSelection_TYPE_IDS;
+}
+
+const std::list<uint64_t>& ReaderOptions::getInclude() const {
+  return privateBits->includedColumnIndexes;
+}
+
+bool ReaderOptions::getNamesSet() const {
+  return privateBits->selection == ColumnSelection_NAMES;
+}
+
+const std::list<std::string>& ReaderOptions::getIncludeNames() const {
+  return privateBits->includedColumnNames;
+}
+
+uint64_t ReaderOptions::getOffset() const { return privateBits->dataStart; }
+
+uint64_t ReaderOptions::getLength() const { return privateBits->dataLength; }
+
+uint64_t ReaderOptions::getTailLocation() const {
+  return privateBits->tailLocation;
+}
+
+ReaderOptions& ReaderOptions::throwOnHive11DecimalOverflow(bool shouldThrow) {
+  privateBits->throwOnHive11DecimalOverflow = shouldThrow;
+  return *this;
+}
+
+bool ReaderOptions::getThrowOnHive11DecimalOverflow() const {
+  return privateBits->throwOnHive11DecimalOverflow;
+}
+
+ReaderOptions& ReaderOptions::forcedScaleOnHive11Decimal(int32_t forcedScale) {
+  privateBits->forcedScaleOnHive11Decimal = forcedScale;
+  return *this;
+}
+
+int32_t ReaderOptions::getForcedScaleOnHive11Decimal() const {
+  return privateBits->forcedScaleOnHive11Decimal;
+}
+
+ReaderOptions& ReaderOptions::setErrorStream(std::ostream& stream) {
+  privateBits->errorStream = &stream;
+  return *this;
+}
+
+std::ostream* ReaderOptions::getErrorStream() const {
+  return privateBits->errorStream;
+}
+
+std::string ReaderOptions::getSerializedFileTail() const {
+  return privateBits->serializedTail;
+}
+
+void ReaderOptions::setPredicateExprs(
+    const univplan::UnivPlanExprPolyList* predicateExprs) {
+  privateBits->predicateExprs = predicateExprs;
+}
+
+const univplan::UnivPlanExprPolyList* ReaderOptions::getPredicateExprs() const {
+  return privateBits->predicateExprs;
+}
+
+void ReaderOptions::setTupleDesc(const dbcommon::TupleDesc* td) {
+  privateBits->td = td;
+}
+
+const dbcommon::TupleDesc* ReaderOptions::getTupleDesc() const {
+  return privateBits->td;
+}
+
+void ReaderOptions::setReadStatsOnlyFlag(bool readStatsOnly) {
+  privateBits->readStatisticsOnly = readStatsOnly;
+}
+
+bool ReaderOptions::readStatsOnly() const {
+  return privateBits->readStatisticsOnly;
+}
+
+Reader::~Reader() {
+  // PASS
+}
+
+static const uint64_t DIRECTORY_SIZE_GUESS = 16 * 1024;
+
+uint64_t getCompressionBlockSize(const proto::PostScript& ps) {
+  if (ps.has_compressionblocksize()) {
+    return ps.compressionblocksize();
+  } else {
+    return 256 * 1024;
+  }
+}
+
+CompressionKind convertCompressionKind(const proto::PostScript& ps) {
+  if (ps.has_compression()) {
+    return static_cast<CompressionKind>(ps.compression());
+  } else {
+    LOG_ERROR(ERRCODE_INTERNAL_ERROR, "Unknown compression type");
+  }
+}
+
+void readFully(char* buffer, int64_t bufferSize, SeekableInputStream* stream) {
+  int64_t posn = 0;
+  while (posn < bufferSize) {
+    const void* chunk;
+    int length;
+    if (!stream->Next(&chunk, &length)) {
+      LOG_ERROR(ERRCODE_INTERNAL_ERROR, "bad read in readFully");
+    }
+    memcpy(buffer + posn, chunk, static_cast<size_t>(length));
+    posn += length;
+  }
+}
+
+ReaderImpl::ReaderImpl(std::unique_ptr<InputStream> input,
+                       const ReaderOptions& opts,
+                       std::unique_ptr<proto::PostScript> _postscript,
+                       std::unique_ptr<proto::Footer> _footer,
+                       uint64_t _fileLength, uint64_t _postscriptLength)
+    : localTimezone(getLocalTimezone()),
+      stream(std::move(input)),
+      options(opts),
+      fileLength(_fileLength),
+      postscriptLength(_postscriptLength),
+      postscript(std::move(_postscript)),
+      memoryPool(*opts.getMemoryPool()),
+      blockSize(getCompressionBlockSize(*postscript)),
+      compression(convertCompressionKind(*postscript)),
+      footer(std::move(_footer)),
+      firstRowOfStripe(memoryPool, 0) {
+  isMetadataLoaded = false;
+  checkOrcVersion();
+  numberOfStripes = static_cast<uint64_t>(footer->stripes_size());
+  currentStripe = static_cast<uint64_t>(footer->stripes_size());
+  lastStripe = 0;
+  currentRowInStripe = 0;
+  uint64_t rowTotal = 0;
+
+  firstRowOfStripe.resize(static_cast<uint64_t>(footer->stripes_size()));
+  for (size_t i = 0; i < static_cast<size_t>(footer->stripes_size()); ++i) {
+    firstRowOfStripe[i] = rowTotal;
+    proto::StripeInformation stripeInfo = footer->stripes(static_cast<int>(i));
+    rowTotal += stripeInfo.numberofrows();
+    bool isStripeInRange =
+        stripeInfo.offset() >= opts.getOffset() &&
+        stripeInfo.offset() < opts.getOffset() + opts.getLength();
+    if (isStripeInRange) {
+      if (i < currentStripe) {
+        currentStripe = i;
+      }
+      if (i >= lastStripe) {
+        lastStripe = i + 1;
+      }
+      // read all stripe footer in the range
+      if (!options.readStatsOnly())
+        stripeFooters.push_back(getStripeFooter(stripeInfo));
+    }
+  }
+  firstStripe = currentStripe;
+
+  if (currentStripe == 0) {
+    previousRow = (std::numeric_limits<uint64_t>::max)();
+  } else if (currentStripe == static_cast<uint64_t>(footer->stripes_size())) {
+    previousRow = footer->numberofrows();
+  } else {
+    previousRow = firstRowOfStripe[firstStripe] - 1;
+  }
+  if (numberOfStripes) {
+    schema = convertType(footer->types(0), *footer);
+    std::vector<std::string> columns;
... 20692 lines suppressed ...