You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2018/07/14 06:01:30 UTC

[arrow] branch master updated: ARROW-2829: [GLib] Add GArrowORCFileReader

This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 2c90eff  ARROW-2829: [GLib] Add GArrowORCFileReader
2c90eff is described below

commit 2c90eff02c212ccf0c21a3f05b5371a801352e0f
Author: Kouhei Sutou <ko...@clear-code.com>
AuthorDate: Sat Jul 14 15:01:13 2018 +0900

    ARROW-2829: [GLib] Add GArrowORCFileReader
    
    c_glib/test/fixture/TestOrcFile.test1.orc is copied from
    https://github.com/apache/orc/blob/master/examples/TestOrcFile.test1.orc .
    Its license is Apache License 2.0.
    
    Author: Kouhei Sutou <ko...@clear-code.com>
    
    Closes #2250 from kou/glib-support-orc and squashes the following commits:
    
    db0404af [Kouhei Sutou] [GLib] Use ORC term not Arrow term for consistency
    f7682f79 [Kouhei Sutou] [GLib] Add GObject Introspection version check for old one
    43dd9ba6 [Kouhei Sutou] [GLib] Enable ORC on Travis CI
    4ac6407a [Kouhei Sutou] [deb] Add arrow-glib-orc.pc
    9c7ab775 [Kouhei Sutou] [GLib] Clean private related code
    61b882f5 [Kouhei Sutou] [GLib] Use accessor for field indexes for future API extension
    7f0c2243 [Kouhei Sutou] [GLib] Fix install error with Meson + ORC
    40aadaac [Kouhei Sutou] [GLib] Add GArrowORCFileReader
---
 .travis.yml                                        |   3 +
 c_glib/arrow-glib/Makefile.am                      |  20 +
 .../arrow-glib-orc.pc.in}                          |  40 +-
 c_glib/arrow-glib/meson.build                      |  24 ++
 c_glib/arrow-glib/orc-file-reader.cpp              | 407 +++++++++++++++++++++
 c_glib/arrow-glib/orc-file-reader.h                |  60 +++
 c_glib/arrow-glib/orc-file-reader.hpp              |  31 ++
 c_glib/configure.ac                                |  16 +
 c_glib/doc/reference/Makefile.am                   |   5 +
 c_glib/doc/reference/arrow-glib-docs.xml           |   1 +
 c_glib/doc/reference/meson.build                   |   7 +
 c_glib/meson.build                                 |   2 +
 c_glib/test/fixture/TestOrcFile.test1.orc          | Bin 0 -> 1711 bytes
 c_glib/test/{run-test.rb => helper/fixture.rb}     |  34 +-
 c_glib/test/run-test.rb                            |   1 +
 c_glib/test/test-orc-file-reader.rb                | 244 ++++++++++++
 .../debian/libarrow-glib-dev.install               |   1 +
 17 files changed, 834 insertions(+), 62 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 51253f2..24b32a4 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -163,6 +163,7 @@ matrix:
     language: cpp
     os: linux
     env:
+    - ARROW_TRAVIS_ORC=1
     - BUILD_TORCH_EXAMPLE=no
     - CC="gcc-4.9"
     - CXX="g++-4.9"
@@ -179,6 +180,8 @@ matrix:
   - compiler: clang
     osx_image: xcode8.3
     os: osx
+    env:
+    - ARROW_TRAVIS_ORC=1
     cache:
     addons:
     rvm: 2.2
diff --git a/c_glib/arrow-glib/Makefile.am b/c_glib/arrow-glib/Makefile.am
index 845048d..7017143 100644
--- a/c_glib/arrow-glib/Makefile.am
+++ b/c_glib/arrow-glib/Makefile.am
@@ -82,6 +82,11 @@ libarrow_glib_la_headers +=			\
 libarrow_glib_la_headers +=			\
 	compute.h
 
+if HAVE_ARROW_ORC
+libarrow_glib_la_headers +=			\
+	orc-file-reader.h
+endif
+
 libarrow_glib_la_generated_headers =		\
 	enums.h					\
 	version.h
@@ -128,6 +133,11 @@ libarrow_glib_la_sources +=			\
 libarrow_glib_la_sources +=			\
 	compute.cpp
 
+if HAVE_ARROW_ORC
+libarrow_glib_la_sources +=			\
+	orc-file-reader.cpp
+endif
+
 libarrow_glib_la_cpp_headers =			\
 	array.hpp				\
 	array-builder.hpp			\
@@ -165,6 +175,11 @@ libarrow_glib_la_cpp_headers +=			\
 libarrow_glib_la_cpp_headers +=			\
 	compute.hpp
 
+if HAVE_ARROW_ORC
+libarrow_glib_la_cpp_headers +=			\
+	orc-file-reader.hpp
+endif
+
 libarrow_glib_la_SOURCES =			\
 	$(libarrow_glib_la_sources)		\
 	$(libarrow_glib_la_cpp_headers)
@@ -217,6 +232,11 @@ pkgconfigdir = $(libdir)/pkgconfig
 pkgconfig_DATA =				\
 	arrow-glib.pc
 
+if HAVE_ARROW_ORC
+pkgconfig_DATA +=				\
+	arrow-glib-orc.pc
+endif
+
 if HAVE_INTROSPECTION
 -include $(INTROSPECTION_MAKEFILE)
 INTROSPECTION_GIRS =
diff --git a/c_glib/test/run-test.rb b/c_glib/arrow-glib/arrow-glib-orc.pc.in
old mode 100755
new mode 100644
similarity index 55%
copy from c_glib/test/run-test.rb
copy to c_glib/arrow-glib/arrow-glib-orc.pc.in
index 392c56f..5d22e14
--- a/c_glib/test/run-test.rb
+++ b/c_glib/arrow-glib/arrow-glib-orc.pc.in
@@ -1,5 +1,3 @@
-#!/usr/bin/env ruby
-#
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -17,34 +15,12 @@
 # specific language governing permissions and limitations
 # under the License.
 
-require "pathname"
-require "test-unit"
-
-base_dir = Pathname(__dir__).parent
-test_dir = base_dir + "test"
-
-require "gi"
-
-Gio = GI.load("Gio")
-Arrow = GI.load("Arrow")
-module Arrow
-  class Buffer
-    alias_method :initialize_raw, :initialize
-    def initialize(data)
-      initialize_raw(data)
-      @data = data
-    end
-  end
-end
-
-begin
-  ArrowGPU = GI.load("ArrowGPU")
-rescue GObjectIntrospection::RepositoryError::TypelibNotFound
-end
-
-require "rbconfig"
-require "tempfile"
-require_relative "helper/buildable"
-require_relative "helper/omittable"
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
 
-exit(Test::Unit::AutoRunner.run(true, test_dir.to_s))
+Name: Apache Arrow GLib ORC
+Description: ORC modules for Apache Arrow GLib
+Version: @VERSION@
+Requires: arrow-glib
diff --git a/c_glib/arrow-glib/meson.build b/c_glib/arrow-glib/meson.build
index 5e3dbe2..e5f4860 100644
--- a/c_glib/arrow-glib/meson.build
+++ b/c_glib/arrow-glib/meson.build
@@ -57,6 +57,12 @@ sources += files(
   'compute.cpp',
 )
 
+if arrow_orc_dependency.found()
+  sources += files(
+    'orc-file-reader.cpp',
+  )
+endif
+
 c_headers = files(
   'array.h',
   'array-builder.h',
@@ -102,6 +108,12 @@ c_headers += files(
   'compute.h',
 )
 
+if arrow_orc_dependency.found()
+  c_headers += files(
+    'orc-file-reader.h',
+  )
+endif
+
 
 cpp_headers = files(
   'array.hpp',
@@ -144,6 +156,11 @@ cpp_headers += files(
   'compute.hpp',
 )
 
+if arrow_orc_dependency.found()
+  cpp_headers += files(
+    'orc-file-reader.hpp',
+  )
+endif
 
 version_h_conf = configuration_data()
 version_h_conf.set('GARROW_VERSION_MAJOR', version_major)
@@ -198,6 +215,13 @@ pkgconfig.generate(filebase: meson.project_name(),
                    version: version,
                    requires: ['gio-2.0', 'arrow'],
                    libraries: [libarrow_glib])
+if arrow_orc_dependency.found()
+  pkgconfig.generate(filebase: meson.project_name(),
+                     name: 'Apache Arrow GLib ORC',
+                     description: 'ORC modules for Apache Arrow GLib',
+                     version: version,
+                     requires: ['arrow-glib'])
+endif
 
 arrow_glib_gir = gnome.generate_gir(libarrow_glib,
                                     sources: sources + c_headers + enums,
diff --git a/c_glib/arrow-glib/orc-file-reader.cpp b/c_glib/arrow-glib/orc-file-reader.cpp
new file mode 100644
index 0000000..87ba563
--- /dev/null
+++ b/c_glib/arrow-glib/orc-file-reader.cpp
@@ -0,0 +1,407 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
+#include <arrow-glib/error.hpp>
+#include <arrow-glib/input-stream.hpp>
+#include <arrow-glib/orc-file-reader.hpp>
+#include <arrow-glib/record-batch.hpp>
+#include <arrow-glib/schema.hpp>
+#include <arrow-glib/table.hpp>
+
+G_BEGIN_DECLS
+
+/**
+ * SECTION: orc-file-reader
+ * @section_id: orc-file-reader
+ * @title: ORC reader
+ * @include: arrow-glib/orc-file-reader.h
+ *
+ * #GArrowORCFileReader is a class for reading stripes in ORC file
+ * format from input.
+ */
+
+typedef struct GArrowORCFileReaderPrivate_ {
+  GArrowSeekableInputStream *input;
+  arrow::adapters::orc::ORCFileReader *orc_file_reader;
+  GArray *field_indexes;
+} GArrowORCFileReaderPrivate;
+
+enum {
+  PROP_0,
+  PROP_INPUT,
+  PROP_ORC_FILE_READER
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowORCFileReader,
+                           garrow_orc_file_reader,
+                           G_TYPE_OBJECT);
+
+#define GARROW_ORC_FILE_READER_GET_PRIVATE(obj)         \
+  static_cast<GArrowORCFileReaderPrivate *>(            \
+     garrow_orc_file_reader_get_instance_private(       \
+       GARROW_ORC_FILE_READER(obj)))
+
+static void
+garrow_orc_file_reader_dispose(GObject *object)
+{
+  auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(object);
+
+  if (priv->input) {
+    g_object_unref(priv->input);
+    priv->input = NULL;
+  }
+
+  G_OBJECT_CLASS(garrow_orc_file_reader_parent_class)->dispose(object);
+}
+
+static void
+garrow_orc_file_reader_finalize(GObject *object)
+{
+  auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(object);
+
+  delete priv->orc_file_reader;
+
+  if (priv->field_indexes) {
+    g_array_free(priv->field_indexes, TRUE);
+  }
+
+  G_OBJECT_CLASS(garrow_orc_file_reader_parent_class)->finalize(object);
+}
+
+static void
+garrow_orc_file_reader_set_property(GObject *object,
+                                    guint prop_id,
+                                    const GValue *value,
+                                    GParamSpec *pspec)
+{
+  auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_INPUT:
+    priv->input = GARROW_SEEKABLE_INPUT_STREAM(g_value_dup_object(value));
+    break;
+  case PROP_ORC_FILE_READER:
+    priv->orc_file_reader =
+      static_cast<arrow::adapters::orc::ORCFileReader *>(g_value_get_pointer(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_orc_file_reader_get_property(GObject *object,
+                                    guint prop_id,
+                                    GValue *value,
+                                    GParamSpec *pspec)
+{
+  auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_INPUT:
+    g_value_set_object(value, priv->input);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_orc_file_reader_init(GArrowORCFileReader *object)
+{
+}
+
+static void
+garrow_orc_file_reader_class_init(GArrowORCFileReaderClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->dispose      = garrow_orc_file_reader_dispose;
+  gobject_class->finalize     = garrow_orc_file_reader_finalize;
+  gobject_class->set_property = garrow_orc_file_reader_set_property;
+  gobject_class->get_property = garrow_orc_file_reader_get_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_object("input",
+                             "Input",
+                             "The input stream",
+                             GARROW_TYPE_SEEKABLE_INPUT_STREAM,
+                             static_cast<GParamFlags>(G_PARAM_READWRITE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_INPUT, spec);
+
+  spec = g_param_spec_pointer("orc-file-reader",
+                              "arrow::adapters::orc::ORCFileReader",
+                              "The raw arrow::adapters::orc::ORCFileReader *",
+                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                       G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_ORC_FILE_READER, spec);
+}
+
+
+/**
+ * garrow_orc_file_reader_new:
+ * @file: The file to be read.
+ * @error: (nullable): Return locatipcn for a #GError or %NULL.
+ *
+ * Returns: (nullable): A newly created #GArrowORCFileReader
+ *   or %NULL on error.
+ *
+ * Since: 0.10.0
+ */
+GArrowORCFileReader *
+garrow_orc_file_reader_new(GArrowSeekableInputStream *input,
+                           GError **error)
+{
+  auto arrow_random_access_file = garrow_seekable_input_stream_get_raw(input);
+  auto pool = arrow::default_memory_pool();
+  std::unique_ptr<arrow::adapters::orc::ORCFileReader> arrow_reader;
+  auto status =
+    arrow::adapters::orc::ORCFileReader::Open(arrow_random_access_file,
+                                              pool,
+                                              &arrow_reader);
+  if (garrow_error_check(error, status, "[orc-file-reader][new]")) {
+    return garrow_orc_file_reader_new_raw(input, arrow_reader.release());
+  } else {
+    return NULL;
+  }
+}
+
+/**
+ * garrow_orc_file_reader_set_field_indexes:
+ * @reader: A #GArrowORCFileReader.
+ * @field_indexes: (nullable) (array length=n_field_indexes):
+ *   The field indexes to be read.
+ * @n_field_indexes: The number of the specified indexes.
+ *
+ * Since: 0.10.0
+ */
+void
+garrow_orc_file_reader_set_field_indexes(GArrowORCFileReader *reader,
+                                         const gint *field_indexes,
+                                         guint n_field_indexes)
+{
+  auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(reader);
+  if (priv->field_indexes) {
+    g_array_free(priv->field_indexes, TRUE);
+  }
+  if (n_field_indexes == 0) {
+    priv->field_indexes = NULL;
+  } else {
+    priv->field_indexes = g_array_sized_new(FALSE,
+                                            FALSE,
+                                            sizeof(gint),
+                                            n_field_indexes);
+    g_array_append_vals(priv->field_indexes, field_indexes, n_field_indexes);
+  }
+}
+
+/**
+ * garrow_orc_file_reader_get_field_indexes:
+ * @reader: A #GArrowORCFileReader.
+ * @n_field_indexes: The number of the specified indexes.
+ *
+ * Returns: (nullable) (array length=n_field_indexes) (transfer none):
+ *  The field indexes to be read.
+ *
+ * Since: 0.10.0
+ */
+const gint *
+garrow_orc_file_reader_get_field_indexes(GArrowORCFileReader *reader,
+                                         guint *n_field_indexes)
+{
+  auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(reader);
+  if (priv->field_indexes) {
+    *n_field_indexes = priv->field_indexes->len;
+    return reinterpret_cast<gint *>(priv->field_indexes->data);
+  } else {
+    *n_field_indexes = 0;
+    return NULL;
+  }
+}
+
+/**
+ * garrow_orc_file_reader_read_type:
+ * @reader: A #GArrowORCFileReader.
+ * @error: (nullable): Return locatipcn for a #GError or %NULL.
+ *
+ * Returns: (nullable) (transfer full): A newly read type as
+ *   #GArrowSchema or %NULL on error.
+ *
+ * Since: 0.10.0
+ */
+GArrowSchema *
+garrow_orc_file_reader_read_type(GArrowORCFileReader *reader,
+                                 GError **error)
+{
+  auto arrow_reader = garrow_orc_file_reader_get_raw(reader);
+  std::shared_ptr<arrow::Schema> arrow_schema;
+  auto status = arrow_reader->ReadSchema(&arrow_schema);
+  if (garrow_error_check(error, status, "[orc-file-reader][read-type]")) {
+    return garrow_schema_new_raw(&arrow_schema);
+  } else {
+    return NULL;
+  }
+}
+
+/**
+ * garrow_orc_file_reader_read_stripes:
+ * @reader: A #GArrowORCFileReader.
+ * @error: (nullable): Return locatipcn for a #GError or %NULL.
+ *
+ * Returns: (nullable) (transfer full): A newly read stripes as
+ *   #GArrowTable or %NULL on error.
+ *
+ * Since: 0.10.0
+ */
+GArrowTable *
+garrow_orc_file_reader_read_stripes(GArrowORCFileReader *reader,
+                                    GError **error)
+{
+  auto arrow_reader = garrow_orc_file_reader_get_raw(reader);
+  auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(reader);
+  if (priv->field_indexes) {
+    std::vector<int> arrow_field_indexes;
+    auto field_indexes = priv->field_indexes;
+    for (guint i = 0; i < field_indexes->len; ++i) {
+      arrow_field_indexes.push_back(g_array_index(field_indexes, gint, i));
+    }
+    std::shared_ptr<arrow::Table> arrow_table;
+    auto status = arrow_reader->Read(arrow_field_indexes, &arrow_table);
+    if (garrow_error_check(error, status, "[orc-file-reader][read-stripes]")) {
+      return garrow_table_new_raw(&arrow_table);
+    } else {
+      return NULL;
+    }
+  } else {
+    std::shared_ptr<arrow::Table> arrow_table;
+    auto status = arrow_reader->Read(&arrow_table);
+    if (garrow_error_check(error, status, "[orc-file-reader][read-stripes]")) {
+      return garrow_table_new_raw(&arrow_table);
+    } else {
+      return NULL;
+    }
+  }
+}
+
+/**
+ * garrow_orc_file_reader_read_stripe:
+ * @reader: A #GArrowORCFileReader.
+ * @i: The stripe index to be read.
+ * @error: (nullable): Return locatipcn for a #GError or %NULL.
+ *
+ * Returns: (nullable) (transfer full): A newly read stripe as
+ *   #GArrowRecordBatch or %NULL on error.
+ *
+ * Since: 0.10.0
+ */
+GArrowRecordBatch *
+garrow_orc_file_reader_read_stripe(GArrowORCFileReader *reader,
+                                   gint64 i,
+                                   GError **error)
+{
+  auto arrow_reader = garrow_orc_file_reader_get_raw(reader);
+  if (i < 0) {
+    i += arrow_reader->NumberOfStripes();
+  }
+  auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(reader);
+  if (priv->field_indexes) {
+    std::vector<int> arrow_field_indexes;
+    auto field_indexes = priv->field_indexes;
+    for (guint j = 0; j < field_indexes->len; ++j) {
+      arrow_field_indexes.push_back(g_array_index(field_indexes, gint, j));
+    }
+    std::shared_ptr<arrow::RecordBatch> arrow_record_batch;
+    auto status = arrow_reader->ReadStripe(i,
+                                           arrow_field_indexes,
+                                           &arrow_record_batch);
+    if (garrow_error_check(error, status, "[orc-file-reader][read-stripe]")) {
+      return garrow_record_batch_new_raw(&arrow_record_batch);
+    } else {
+      return NULL;
+    }
+  } else {
+    std::shared_ptr<arrow::RecordBatch> arrow_record_batch;
+    auto status = arrow_reader->ReadStripe(i, &arrow_record_batch);
+    if (garrow_error_check(error, status, "[orc-file-reader][read-stripe]")) {
+      return garrow_record_batch_new_raw(&arrow_record_batch);
+    } else {
+      return NULL;
+    }
+  }
+}
+
+/**
+ * garrow_orc_file_reader_get_n_stripes:
+ * @reader: A #GArrowORCFileReader.
+ *
+ * Returns: The number of stripes in the file.
+ *
+ * Since: 0.10.0
+ */
+gint64
+garrow_orc_file_reader_get_n_stripes(GArrowORCFileReader *reader)
+{
+  auto arrow_reader = garrow_orc_file_reader_get_raw(reader);
+  return arrow_reader->NumberOfStripes();
+}
+
+/**
+ * garrow_orc_file_reader_get_n_rows:
+ * @reader: A #GArrowORCFileReader.
+ *
+ * Returns: The number of rows in the file.
+ *
+ * Since: 0.10.0
+ */
+gint64
+garrow_orc_file_reader_get_n_rows(GArrowORCFileReader *reader)
+{
+  auto arrow_reader = garrow_orc_file_reader_get_raw(reader);
+  return arrow_reader->NumberOfRows();
+}
+
+
+G_END_DECLS
+
+
+GArrowORCFileReader *
+garrow_orc_file_reader_new_raw(GArrowSeekableInputStream *input,
+                               arrow::adapters::orc::ORCFileReader *arrow_reader)
+{
+  auto reader =
+    GARROW_ORC_FILE_READER(g_object_new(GARROW_TYPE_ORC_FILE_READER,
+                                        "input", input,
+                                        "orc-file-reader", arrow_reader,
+                                        NULL));
+  return reader;
+}
+
+arrow::adapters::orc::ORCFileReader *
+garrow_orc_file_reader_get_raw(GArrowORCFileReader *reader)
+{
+  auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(reader);
+  return priv->orc_file_reader;
+}
diff --git a/c_glib/arrow-glib/orc-file-reader.h b/c_glib/arrow-glib/orc-file-reader.h
new file mode 100644
index 0000000..67fd8b0
--- /dev/null
+++ b/c_glib/arrow-glib/orc-file-reader.h
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow-glib/reader.h>
+
+G_BEGIN_DECLS
+
+#define GARROW_TYPE_ORC_FILE_READER (garrow_orc_file_reader_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowORCFileReader,
+                         garrow_orc_file_reader,
+                         GARROW,
+                         ORC_FILE_READER,
+                         GObject)
+struct _GArrowORCFileReaderClass
+{
+  GObjectClass parent_class;
+};
+
+GArrowORCFileReader *
+garrow_orc_file_reader_new(GArrowSeekableInputStream *file,
+                           GError **error);
+void
+garrow_orc_file_reader_set_field_indexes(GArrowORCFileReader *reader,
+                                         const gint *field_indexes,
+                                         guint n_field_indexes);
+const gint *
+garrow_orc_file_reader_get_field_indexes(GArrowORCFileReader *reader,
+                                         guint *n_field_indexes);
+GArrowSchema *
+garrow_orc_file_reader_read_type(GArrowORCFileReader *reader,
+                                 GError **error);
+GArrowTable *
+garrow_orc_file_reader_read_stripes(GArrowORCFileReader *reader,
+                                    GError **error);
+GArrowRecordBatch *
+garrow_orc_file_reader_read_stripe(GArrowORCFileReader *reader,
+                                   gint64 i,
+                                   GError **error);
+gint64 garrow_orc_file_reader_get_n_stripes(GArrowORCFileReader *reader);
+gint64 garrow_orc_file_reader_get_n_rows(GArrowORCFileReader *reader);
+
+G_END_DECLS
diff --git a/c_glib/arrow-glib/orc-file-reader.hpp b/c_glib/arrow-glib/orc-file-reader.hpp
new file mode 100644
index 0000000..4171290
--- /dev/null
+++ b/c_glib/arrow-glib/orc-file-reader.hpp
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow/adapters/orc/adapter.h>
+
+#include <arrow-glib/reader.hpp>
+#include <arrow-glib/orc-file-reader.h>
+
+GArrowORCFileReader *
+garrow_orc_file_reader_new_raw(GArrowSeekableInputStream *input,
+                               arrow::adapters::orc::ORCFileReader *arrow_reader);
+arrow::adapters::orc::ORCFileReader *
+garrow_orc_file_reader_get_raw(GArrowORCFileReader *reader);
diff --git a/c_glib/configure.ac b/c_glib/configure.ac
index d80cc9e..d6b7820 100644
--- a/c_glib/configure.ac
+++ b/c_glib/configure.ac
@@ -115,6 +115,10 @@ if test "x$GARROW_ARROW_CPP_BUILD_DIR" = "x"; then
   USE_ARROW_BUILD_DIR=no
 
   PKG_CHECK_MODULES([ARROW], [arrow arrow-compute])
+  PKG_CHECK_MODULES([ARROW_ORC],
+                    [arrow-orc],
+                    [HAVE_ARROW_ORC=yes],
+                    [HAVE_ARROW_ORC=no])
   PKG_CHECK_MODULES([ARROW_GPU],
                     [arrow-gpu],
                     [HAVE_ARROW_GPU=yes],
@@ -135,6 +139,12 @@ else
   AC_SUBST(ARROW_CFLAGS)
   AC_SUBST(ARROW_LIBS)
 
+  if test -f "${GARROW_ARROW_CPP_BUILD_DIR}/src/arrow/adapters/orc/arrow-orc.pc"; then
+    HAVE_ARROW_ORC=yes
+  else
+    HAVE_ARROW_ORC=no
+  fi
+
   ARROW_GPU_CFLAGS=""
   if test -f "${GARROW_ARROW_CPP_BUILD_DIR}/src/arrow/gpu/arrow-gpu.pc"; then
     HAVE_ARROW_GPU=yes
@@ -150,6 +160,11 @@ fi
 AM_CONDITIONAL([USE_ARROW_BUILD_DIR],
                [test "$USE_ARROW_BUILD_DIR" = "yes"])
 
+AM_CONDITIONAL([HAVE_ARROW_ORC], [test "$HAVE_ARROW_ORC" = "yes"])
+if test "$HAVE_ARROW_ORC" = "yes"; then
+  AC_DEFINE(HAVE_ARROW_ORC, [1], [Define to 1 if Apache Arrow supports ORC.])
+fi
+
 AM_CONDITIONAL([HAVE_ARROW_GPU], [test "$HAVE_ARROW_GPU" = "yes"])
 if test "$HAVE_ARROW_GPU" = "yes"; then
   AC_DEFINE(HAVE_ARROW_GPU, [1], [Define to 1 if Apache Arrow supports GPU.])
@@ -162,6 +177,7 @@ AC_CONFIG_FILES([
   Makefile
   arrow-glib/Makefile
   arrow-glib/arrow-glib.pc
+  arrow-glib/arrow-glib-orc.pc
   arrow-glib/version.h
   arrow-gpu-glib/Makefile
   arrow-gpu-glib/arrow-gpu-glib.pc
diff --git a/c_glib/doc/reference/Makefile.am b/c_glib/doc/reference/Makefile.am
index 6f916a6..ad0c938 100644
--- a/c_glib/doc/reference/Makefile.am
+++ b/c_glib/doc/reference/Makefile.am
@@ -36,6 +36,11 @@ HFILE_GLOB =					\
 
 IGNORE_HFILES =
 
+if !HAVE_ARROW_ORC
+IGNORE_HFILES +=					\
+	$(top_srcdir)/arrow-glib/orc-file-reader.h
+endif
+
 CFILE_GLOB =					\
 	$(top_srcdir)/arrow-glib/*.cpp
 
diff --git a/c_glib/doc/reference/arrow-glib-docs.xml b/c_glib/doc/reference/arrow-glib-docs.xml
index 2c75041..776a7b7 100644
--- a/c_glib/doc/reference/arrow-glib-docs.xml
+++ b/c_glib/doc/reference/arrow-glib-docs.xml
@@ -123,6 +123,7 @@
     <chapter id="reader">
       <title>Reader</title>
       <xi:include href="xml/reader.xml"/>
+      <xi:include href="xml/orc-file-reader.xml"><xi:fallback /></xi:include>
     </chapter>
     <chapter id="writer">
       <title>Writer</title>
diff --git a/c_glib/doc/reference/meson.build b/c_glib/doc/reference/meson.build
index 431aa0a..4f72424 100644
--- a/c_glib/doc/reference/meson.build
+++ b/c_glib/doc/reference/meson.build
@@ -59,10 +59,17 @@ if arrow_gpu_dependency.found()
     libarrow_gpu_glib_dependency,
   ]
 endif
+ignore_headers = []
+if not arrow_orc_dependency.found()
+  ignore_headers += [
+    join_paths(meson.source_root(), 'arrow-glib', 'orc-file-reader.h'),
+  ]
+endif
 gnome.gtkdoc(meson.project_name(),
              main_xml: meson.project_name() + '-docs.xml',
              src_dir: source_directories,
              dependencies: dependencies,
+             ignore_headers: ignore_headers,
              gobject_typesfile: meson.project_name() + '.types',
              scan_args: [
                '--rebuild-types',
diff --git a/c_glib/meson.build b/c_glib/meson.build
index 330f2bb..3f7846d 100644
--- a/c_glib/meson.build
+++ b/c_glib/meson.build
@@ -48,6 +48,8 @@ pkgconfig = import('pkgconfig')
 
 root_inc = include_directories('.')
 
+arrow_orc_dependency = dependency('arrow-orc', required: false)
+
 subdir('arrow-glib')
 arrow_gpu_dependency = dependency('arrow-gpu', required: false)
 if arrow_gpu_dependency.found()
diff --git a/c_glib/test/fixture/TestOrcFile.test1.orc b/c_glib/test/fixture/TestOrcFile.test1.orc
new file mode 100644
index 0000000..4fb0bef
Binary files /dev/null and b/c_glib/test/fixture/TestOrcFile.test1.orc differ
diff --git a/c_glib/test/run-test.rb b/c_glib/test/helper/fixture.rb
old mode 100755
new mode 100644
similarity index 57%
copy from c_glib/test/run-test.rb
copy to c_glib/test/helper/fixture.rb
index 392c56f..f07afd0
--- a/c_glib/test/run-test.rb
+++ b/c_glib/test/helper/fixture.rb
@@ -1,5 +1,3 @@
-#!/usr/bin/env ruby
-#
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -17,34 +15,10 @@
 # specific language governing permissions and limitations
 # under the License.
 
-require "pathname"
-require "test-unit"
-
-base_dir = Pathname(__dir__).parent
-test_dir = base_dir + "test"
-
-require "gi"
-
-Gio = GI.load("Gio")
-Arrow = GI.load("Arrow")
-module Arrow
-  class Buffer
-    alias_method :initialize_raw, :initialize
-    def initialize(data)
-      initialize_raw(data)
-      @data = data
+module Helper
+  module Fixture
+    def fixture_path(*components)
+      File.join(__dir__, "..", "fixture", *components)
     end
   end
 end
-
-begin
-  ArrowGPU = GI.load("ArrowGPU")
-rescue GObjectIntrospection::RepositoryError::TypelibNotFound
-end
-
-require "rbconfig"
-require "tempfile"
-require_relative "helper/buildable"
-require_relative "helper/omittable"
-
-exit(Test::Unit::AutoRunner.run(true, test_dir.to_s))
diff --git a/c_glib/test/run-test.rb b/c_glib/test/run-test.rb
index 392c56f..366b083 100755
--- a/c_glib/test/run-test.rb
+++ b/c_glib/test/run-test.rb
@@ -45,6 +45,7 @@ end
 require "rbconfig"
 require "tempfile"
 require_relative "helper/buildable"
+require_relative "helper/fixture"
 require_relative "helper/omittable"
 
 exit(Test::Unit::AutoRunner.run(true, test_dir.to_s))
diff --git a/c_glib/test/test-orc-file-reader.rb b/c_glib/test/test-orc-file-reader.rb
new file mode 100644
index 0000000..6b5c640
--- /dev/null
+++ b/c_glib/test/test-orc-file-reader.rb
@@ -0,0 +1,244 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestORCFileReader < Test::Unit::TestCase
+  include Helper::Omittable
+  include Helper::Fixture
+
+  def setup
+    omit("Require Apache Arrow ORC") unless Arrow.const_defined?(:ORCFileReader)
+    path = fixture_path("TestOrcFile.test1.orc")
+    input = Arrow::MemoryMappedInputStream.new(path)
+    @reader = Arrow::ORCFileReader.new(input)
+  end
+
+  def test_read_type
+    assert_equal(<<-SCHEMA.chomp, @reader.read_type.to_s)
+boolean1: bool
+byte1: int8
+short1: int16
+int1: int32
+long1: int64
+float1: float
+double1: double
+bytes1: binary
+string1: string
+middle: struct<list: list<item: struct<int1: int32, string1: string>>>
+list: list<item: struct<int1: int32, string1: string>>
+map: list<item: struct<key: string, value: struct<int1: int32, string1: string>>>
+    SCHEMA
+  end
+
+  def test_field_indexes
+    require_gi(1, 42, 0)
+    assert_nil(@reader.field_indexes)
+    @reader.set_field_indexes([1, 3])
+    assert_equal([1, 3], @reader.field_indexes)
+  end
+
+  sub_test_case("#read_stripes") do
+    test("all") do
+      table = @reader.read_stripes
+      dump = table.n_columns.times.collect do |i|
+        column = table.get_column(i)
+        [
+          column.field.to_s,
+          column.data.chunks.collect(&:to_s),
+        ]
+      end
+      assert_equal([
+                     ["boolean1: bool", ["[false, true]"]],
+                     ["byte1: int8", ["[1, 100]"]],
+                     ["short1: int16", ["[1024, 2048]"]],
+                     ["int1: int32", ["[65536, 65536]"]],
+                     [
+                       "long1: int64",
+                       ["[9223372036854775807, 9223372036854775807]"],
+                     ],
+                     ["float1: float", ["[1, 2]"]],
+                     ["double1: double", ["[-15, -5]"]],
+                     ["bytes1: binary", ["[0001020304, ]"]],
+                     ["string1: string", ["[\"hi\", \"bye\"]"]],
+                     [
+                       "middle: " +
+                       "struct<list: " +
+                       "list<item: struct<int1: int32, string1: string>>>",
+                       [
+                         <<-STRUCT.chomp
+
+-- is_valid: all not null
+-- child 0 type: list<item: struct<int1: int32, string1: string>> values: 
+  -- is_valid: all not null
+  -- value_offsets: [0, 2, 4]
+  -- values: 
+    -- is_valid: all not null
+    -- child 0 type: int32 values: [1, 2, 1, 2]
+    -- child 1 type: string values: ["bye", "sigh", "bye", "sigh"]
+                          STRUCT
+                       ]
+                     ],
+                     [
+                       "list: list<item: struct<int1: int32, string1: string>>",
+                       [
+                         <<-LIST.chomp
+
+-- is_valid: all not null
+-- value_offsets: [0, 2, 5]
+-- values: 
+  -- is_valid: all not null
+  -- child 0 type: int32 values: [3, 4, 100000000, -100000, 1234]
+  -- child 1 type: string values: ["good", "bad", "cat", "in", "hat"]
+                         LIST
+                       ]
+                     ],
+                     [
+                       "map: list<item: " +
+                       "struct<key: string, value: " +
+                       "struct<int1: int32, string1: string>>>",
+                       [
+                         <<-MAP.chomp
+
+-- is_valid: all not null
+-- value_offsets: [0, 0, 2]
+-- values: 
+  -- is_valid: all not null
+  -- child 0 type: string values: ["chani", "mauddib"]
+  -- child 1 type: struct<int1: int32, string1: string> values: 
+    -- is_valid: all not null
+    -- child 0 type: int32 values: [5, 1]
+    -- child 1 type: string values: ["chani", "mauddib"]
+                         MAP
+                       ],
+                     ],
+                   ],
+                   dump)
+    end
+
+    test("select fields") do
+      @reader.set_field_indexes([1, 3])
+      table = @reader.read_stripes
+      dump = table.n_columns.times.collect do |i|
+        column = table.get_column(i)
+        [
+          column.field.to_s,
+          column.data.chunks.collect(&:to_s),
+        ]
+      end
+      assert_equal([
+                     ["boolean1: bool", ["[false, true]"]],
+                     ["short1: int16", ["[1024, 2048]"]],
+                   ],
+                   dump)
+    end
+  end
+
+  sub_test_case("#read_stripe") do
+    test("all") do
+      record_batch = @reader.read_stripe(0)
+      dump = record_batch.n_columns.times.collect do |i|
+        [
+          record_batch.schema.get_field(i).to_s,
+          record_batch.get_column(i).to_s,
+        ]
+      end
+      assert_equal([
+                     ["boolean1: bool", "[false, true]"],
+                     ["byte1: int8", "[1, 100]"],
+                     ["short1: int16", "[1024, 2048]"],
+                     ["int1: int32", "[65536, 65536]"],
+                     [
+                       "long1: int64",
+                       "[9223372036854775807, 9223372036854775807]",
+                     ],
+                     ["float1: float", "[1, 2]"],
+                     ["double1: double", "[-15, -5]"],
+                     ["bytes1: binary", "[0001020304, ]"],
+                     ["string1: string", "[\"hi\", \"bye\"]"],
+                     [
+                       "middle: " +
+                       "struct<list: " +
+                       "list<item: struct<int1: int32, string1: string>>>",
+                       <<-STRUCT.chomp
+
+-- is_valid: all not null
+-- child 0 type: list<item: struct<int1: int32, string1: string>> values: 
+  -- is_valid: all not null
+  -- value_offsets: [0, 2, 4]
+  -- values: 
+    -- is_valid: all not null
+    -- child 0 type: int32 values: [1, 2, 1, 2]
+    -- child 1 type: string values: ["bye", "sigh", "bye", "sigh"]
+                        STRUCT
+                     ],
+                     [
+                       "list: list<item: struct<int1: int32, string1: string>>",
+                       <<-LIST.chomp
+
+-- is_valid: all not null
+-- value_offsets: [0, 2, 5]
+-- values: 
+  -- is_valid: all not null
+  -- child 0 type: int32 values: [3, 4, 100000000, -100000, 1234]
+  -- child 1 type: string values: ["good", "bad", "cat", "in", "hat"]
+                       LIST
+                     ],
+                     [
+                       "map: list<item: " +
+                       "struct<key: string, value: " +
+                       "struct<int1: int32, string1: string>>>",
+                       <<-MAP.chomp
+
+-- is_valid: all not null
+-- value_offsets: [0, 0, 2]
+-- values: 
+  -- is_valid: all not null
+  -- child 0 type: string values: ["chani", "mauddib"]
+  -- child 1 type: struct<int1: int32, string1: string> values: 
+    -- is_valid: all not null
+    -- child 0 type: int32 values: [5, 1]
+    -- child 1 type: string values: ["chani", "mauddib"]
+                       MAP
+                     ],
+                   ],
+                   dump)
+    end
+
+    test("select fields") do
+      @reader.set_field_indexes([1, 3])
+      record_batch = @reader.read_stripe(0)
+      dump = record_batch.n_columns.times.collect do |i|
+        [
+          record_batch.schema.get_field(i).to_s,
+          record_batch.get_column(i).to_s,
+        ]
+      end
+      assert_equal([
+                     ["boolean1: bool", "[false, true]"],
+                     ["short1: int16", "[1024, 2048]"],
+                   ],
+                   dump)
+    end
+  end
+
+  def test_n_stripes
+    assert_equal(1, @reader.n_stripes)
+  end
+
+  def test_n_rows
+    assert_equal(2, @reader.n_rows)
+  end
+end
diff --git a/dev/tasks/linux-packages/debian/libarrow-glib-dev.install b/dev/tasks/linux-packages/debian/libarrow-glib-dev.install
index e59a1f9..461fbd4 100644
--- a/dev/tasks/linux-packages/debian/libarrow-glib-dev.install
+++ b/dev/tasks/linux-packages/debian/libarrow-glib-dev.install
@@ -2,5 +2,6 @@ usr/include/arrow-glib/
 usr/lib/*/libarrow-glib.a
 usr/lib/*/libarrow-glib.so
 usr/lib/*/pkgconfig/arrow-glib.pc
+usr/lib/*/pkgconfig/arrow-glib-orc.pc
 usr/share/gir-1.0/Arrow-1.0.gir
 usr/share/arrow-glib/example/