You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2016/11/11 19:20:53 UTC

arrow git commit: ARROW-375: Fix unicode Python 3 issue in columns argument of parquet.read_table

Repository: arrow
Updated Branches:
  refs/heads/master 7f048a4b8 -> 48f9780a8


ARROW-375: Fix unicode Python 3 issue in columns argument of parquet.read_table

Author: Wes McKinney <we...@twosigma.com>

Closes #204 from wesm/ARROW-375 and squashes the following commits:

9e6f2a6 [Wes McKinney] BUG: convert unicode to utf8 bytes for column filtering


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/48f9780a
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/48f9780a
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/48f9780a

Branch: refs/heads/master
Commit: 48f9780a8677546cb143a09b25b0b57c1946ba07
Parents: 7f048a4
Author: Wes McKinney <we...@twosigma.com>
Authored: Fri Nov 11 14:20:36 2016 -0500
Committer: Wes McKinney <we...@twosigma.com>
Committed: Fri Nov 11 14:20:36 2016 -0500

----------------------------------------------------------------------
 python/pyarrow/parquet.pyx | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/48f9780a/python/pyarrow/parquet.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/parquet.pyx b/python/pyarrow/parquet.pyx
index 2152f89..a6e3ac3 100644
--- a/python/pyarrow/parquet.pyx
+++ b/python/pyarrow/parquet.pyx
@@ -93,15 +93,18 @@ cdef class ParquetReader:
             Integer index of the position of the column
         """
         cdef:
-            const FileMetaData* metadata = self.reader.get().parquet_reader().metadata()
+            const FileMetaData* metadata = (self.reader.get()
+                                            .parquet_reader().metadata())
             int i = 0
 
         if self.column_idx_map is None:
             self.column_idx_map = {}
             for i in range(0, metadata.num_columns()):
-                self.column_idx_map[str(metadata.schema().Column(i).path().get().ToDotString())] = i
+                col_bytes = tobytes(metadata.schema().Column(i)
+                                    .path().get().ToDotString())
+                self.column_idx_map[col_bytes] = i
 
-        return self.column_idx_map[column_name]
+        return self.column_idx_map[tobytes(column_name)]
 
     def read_column(self, int column_index):
         cdef:
@@ -109,7 +112,8 @@ cdef class ParquetReader:
             shared_ptr[CArray] carray
 
         with nogil:
-            check_status(self.reader.get().ReadFlatColumn(column_index, &carray))
+            check_status(self.reader.get()
+                         .ReadFlatColumn(column_index, &carray))
 
         array.init(carray)
         return array