You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by we...@apache.org on 2017/05/04 03:44:18 UTC

parquet-cpp git commit: PARQUET-936: Return Invalid Status if chunk_size <= 0 when WriteTable in parquet-arrow

Repository: parquet-cpp
Updated Branches:
  refs/heads/master e414012a8 -> ecacbc9df


PARQUET-936: Return Invalid Status if chunk_size <= 0 when WriteTable in parquet-arrow

Author: Xianjin YE <ad...@gmail.com>

Closes #316 from advancedxy/PARQUET-936 and squashes the following commits:

9a47ec3 [Xianjin YE] Test chunk_size > 0 in WriteTable api.
24f8d48 [Xianjin YE] Return Invalid Status if chunk_size <= 0 when WriteTable in parquet-arrow.


Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/ecacbc9d
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/ecacbc9d
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/ecacbc9d

Branch: refs/heads/master
Commit: ecacbc9df702372a1cec5ad9156ce117e82d5b1d
Parents: e414012
Author: Xianjin YE <ad...@gmail.com>
Authored: Wed May 3 23:44:11 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Wed May 3 23:44:11 2017 -0400

----------------------------------------------------------------------
 src/parquet/arrow/arrow-reader-writer-test.cc | 13 +++++++++++++
 src/parquet/arrow/writer.cc                   |  4 ++++
 2 files changed, 17 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/ecacbc9d/src/parquet/arrow/arrow-reader-writer-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/arrow-reader-writer-test.cc b/src/parquet/arrow/arrow-reader-writer-test.cc
index 4c351b4..48790d9 100644
--- a/src/parquet/arrow/arrow-reader-writer-test.cc
+++ b/src/parquet/arrow/arrow-reader-writer-test.cc
@@ -1061,6 +1061,19 @@ TEST(TestArrowReadWrite, ReadColumnSubset) {
   ASSERT_TRUE(result->Equals(expected));
 }
 
+TEST(TestArrowWrite, CheckChunkSize) {
+  const int num_columns = 2;
+  const int num_rows = 128;
+  const int64_t chunk_size = 0; // note the chunk_size is 0
+  std::shared_ptr<Table> table;
+  MakeDoubleTable(num_columns, num_rows, 1, &table);
+
+  auto sink = std::make_shared<InMemoryOutputStream>();
+
+  ASSERT_RAISES(
+      Invalid, WriteTable(*table, ::arrow::default_memory_pool(), sink, chunk_size));
+}
+
 class TestNestedSchemaRead : public ::testing::Test {
  protected:
   virtual void SetUp() {

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/ecacbc9d/src/parquet/arrow/writer.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/writer.cc b/src/parquet/arrow/writer.cc
index 2ebeb4a..631e16c 100644
--- a/src/parquet/arrow/writer.cc
+++ b/src/parquet/arrow/writer.cc
@@ -717,6 +717,10 @@ Status FileWriter::WriteTable(const Table& table, int64_t chunk_size) {
     }
   }
 
+  if (chunk_size <= 0) {
+    return Status::Invalid("chunk size per row_group must be greater than 0");
+  }
+
   for (int chunk = 0; chunk * chunk_size < table.num_rows(); chunk++) {
     int64_t offset = chunk * chunk_size;
     int64_t size = std::min(chunk_size, table.num_rows() - offset);