You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2016/11/11 19:20:53 UTC
arrow git commit: ARROW-375: Fix unicode Python 3 issue in columns
argument of parquet.read_table
Repository: arrow
Updated Branches:
refs/heads/master 7f048a4b8 -> 48f9780a8
ARROW-375: Fix unicode Python 3 issue in columns argument of parquet.read_table
Author: Wes McKinney <we...@twosigma.com>
Closes #204 from wesm/ARROW-375 and squashes the following commits:
9e6f2a6 [Wes McKinney] BUG: convert unicode to utf8 bytes for column filtering
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/48f9780a
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/48f9780a
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/48f9780a
Branch: refs/heads/master
Commit: 48f9780a8677546cb143a09b25b0b57c1946ba07
Parents: 7f048a4
Author: Wes McKinney <we...@twosigma.com>
Authored: Fri Nov 11 14:20:36 2016 -0500
Committer: Wes McKinney <we...@twosigma.com>
Committed: Fri Nov 11 14:20:36 2016 -0500
----------------------------------------------------------------------
python/pyarrow/parquet.pyx | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/48f9780a/python/pyarrow/parquet.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/parquet.pyx b/python/pyarrow/parquet.pyx
index 2152f89..a6e3ac3 100644
--- a/python/pyarrow/parquet.pyx
+++ b/python/pyarrow/parquet.pyx
@@ -93,15 +93,18 @@ cdef class ParquetReader:
Integer index of the position of the column
"""
cdef:
- const FileMetaData* metadata = self.reader.get().parquet_reader().metadata()
+ const FileMetaData* metadata = (self.reader.get()
+ .parquet_reader().metadata())
int i = 0
if self.column_idx_map is None:
self.column_idx_map = {}
for i in range(0, metadata.num_columns()):
- self.column_idx_map[str(metadata.schema().Column(i).path().get().ToDotString())] = i
+ col_bytes = tobytes(metadata.schema().Column(i)
+ .path().get().ToDotString())
+ self.column_idx_map[col_bytes] = i
- return self.column_idx_map[column_name]
+ return self.column_idx_map[tobytes(column_name)]
def read_column(self, int column_index):
cdef:
@@ -109,7 +112,8 @@ cdef class ParquetReader:
shared_ptr[CArray] carray
with nogil:
- check_status(self.reader.get().ReadFlatColumn(column_index, &carray))
+ check_status(self.reader.get()
+ .ReadFlatColumn(column_index, &carray))
array.init(carray)
return array