You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2018/04/23 20:56:58 UTC
[arrow] branch master updated: ARROW-2470: [C++] Avoid seeking in
GetFileSize
This is an automated email from the ASF dual-hosted git repository.
uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new b65205e ARROW-2470: [C++] Avoid seeking in GetFileSize
b65205e is described below
commit b65205e32eaa5d8548b6f1a7f9893d3b2ebf8588
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Mon Apr 23 22:56:51 2018 +0200
ARROW-2470: [C++] Avoid seeking in GetFileSize
This makes GetFileSize thread-safe and also reduces its cost.
Author: Antoine Pitrou <an...@python.org>
Closes #1934 from pitrou/ARROW-2470-getfilesize and squashes the following commits:
52392075 <Antoine Pitrou> ARROW-2470: Avoid seeking in GetFileSize
---
cpp/src/arrow/io/io-file-test.cc | 27 ++++++++++++++++++++++++--
cpp/src/arrow/util/io-util.cc | 41 +++++++++++++++++++++++-----------------
python/pyarrow/tests/test_io.py | 2 ++
3 files changed, 51 insertions(+), 19 deletions(-)
diff --git a/cpp/src/arrow/io/io-file-test.cc b/cpp/src/arrow/io/io-file-test.cc
index b661fb6..40e24aa 100644
--- a/cpp/src/arrow/io/io-file-test.cc
+++ b/cpp/src/arrow/io/io-file-test.cc
@@ -322,6 +322,9 @@ TEST_F(TestReadableFile, SeekTellSize) {
ASSERT_OK(file_->GetSize(&size));
ASSERT_EQ(8, size);
+ ASSERT_OK(file_->Tell(&position));
+ ASSERT_EQ(100, position);
+
// does not support zero copy
ASSERT_FALSE(file_->supports_zero_copy());
}
@@ -526,6 +529,12 @@ TEST_F(TestPipeIO, TestWrite) {
ASSERT_EQ(bytes_read, 0);
}
+TEST_F(TestPipeIO, ReadableFileFails) {
+ // ReadableFile fails on non-seekable fd
+ std::shared_ptr<ReadableFile> file;
+ ASSERT_RAISES(IOError, ReadableFile::Open(r_, &file));
+}
+
// ----------------------------------------------------------------------
// Memory map tests
@@ -541,7 +550,7 @@ TEST_F(TestMemoryMappedFile, ZeroSizeFlie) {
std::shared_ptr<MemoryMappedFile> result;
ASSERT_OK(InitMemoryMap(0, path, &result));
- int64_t size = 0;
+ int64_t size = -1;
ASSERT_OK(result->Tell(&size));
ASSERT_EQ(0, size);
}
@@ -554,7 +563,7 @@ TEST_F(TestMemoryMappedFile, WriteRead) {
const int reps = 5;
- std::string path = "ipc-write-read-test";
+ std::string path = "io-memory-map-write-read-test";
std::shared_ptr<MemoryMappedFile> result;
ASSERT_OK(InitMemoryMap(reps * buffer_size, path, &result));
@@ -570,6 +579,20 @@ TEST_F(TestMemoryMappedFile, WriteRead) {
}
}
+TEST_F(TestMemoryMappedFile, GetSize) {
+ std::string path = "io-memory-map-get-size";
+ std::shared_ptr<MemoryMappedFile> result;
+ ASSERT_OK(InitMemoryMap(16384, path, &result));
+
+ int64_t size = -1;
+ ASSERT_OK(result->GetSize(&size));
+ ASSERT_EQ(16384, size);
+
+ int64_t position = -1;
+ ASSERT_OK(result->Tell(&position));
+ ASSERT_EQ(0, position);
+}
+
TEST_F(TestMemoryMappedFile, ReadOnly) {
const int64_t buffer_size = 1024;
std::vector<uint8_t> buffer(buffer_size);
diff --git a/cpp/src/arrow/util/io-util.cc b/cpp/src/arrow/util/io-util.cc
index 10a30df..03edc18 100644
--- a/cpp/src/arrow/util/io-util.cc
+++ b/cpp/src/arrow/util/io-util.cc
@@ -255,25 +255,32 @@ Status FileSeek(int fd, int64_t pos, int whence) {
Status FileSeek(int fd, int64_t pos) { return FileSeek(fd, pos, SEEK_SET); }
Status FileGetSize(int fd, int64_t* size) {
- int64_t ret;
-
- // XXX Should use fstat() instead, but this function also ensures the
- // file is seekable
-
- // Save current position
- int64_t current_position = lseek64_compat(fd, 0, SEEK_CUR);
- CHECK_LSEEK(current_position);
-
- // Move to end of the file, which returns the file length
- ret = lseek64_compat(fd, 0, SEEK_END);
- CHECK_LSEEK(ret);
-
- *size = ret;
+#if defined(_MSC_VER)
+ struct __stat64 st;
+#else
+ struct stat st;
+#endif
+ st.st_size = -1;
- // Restore file position
- ret = lseek64_compat(fd, current_position, SEEK_SET);
- CHECK_LSEEK(ret);
+#if defined(_MSC_VER)
+ int ret = _fstat64(fd, &st);
+#else
+ int ret = fstat(fd, &st);
+#endif
+ if (ret == -1) {
+ return Status::IOError("error stat()ing file");
+ }
+ if (st.st_size == 0) {
+ // Maybe the file doesn't support getting its size, double-check by
+ // trying to tell() (seekable files usually have a size, while
+ // non-seekable files don't)
+ int64_t position;
+ RETURN_NOT_OK(FileTell(fd, &position));
+ } else if (st.st_size < 0) {
+ return Status::IOError("error getting file size");
+ }
+ *size = st.st_size;
return Status::OK();
}
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index 1511600..02851be 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -100,6 +100,8 @@ def test_python_file_read():
assert v == b'sample data'
assert len(v) == 11
+ assert f.size() == len(data)
+
f.close()
--
To stop receiving notification emails like this one, please contact
uwe@apache.org.