You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by ko...@apache.org on 2019/10/28 19:36:47 UTC
[avro] branch master updated: AVRO-2432: Handle Empty Datafiles
(#691)
This is an automated email from the ASF dual-hosted git repository.
kojiromike pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/master by this push:
new 7d87cde AVRO-2432: Handle Empty Datafiles (#691)
7d87cde is described below
commit 7d87cde06253913e9ef23c671e9ce092bc35dfe3
Author: Michael A. Smith <mi...@smith-li.com>
AuthorDate: Mon Oct 28 15:35:53 2019 -0400
AVRO-2432: Handle Empty Datafiles (#691)
---
lang/py/src/avro/datafile.py | 15 ++++-----------
lang/py/test/test_datafile.py | 14 ++++++++++++++
lang/py3/avro/datafile.py | 19 ++++---------------
lang/py3/avro/tests/test_datafile.py | 15 +++++++++++++++
4 files changed, 37 insertions(+), 26 deletions(-)
diff --git a/lang/py/src/avro/datafile.py b/lang/py/src/avro/datafile.py
index 75a8e4a..0d29a6a 100644
--- a/lang/py/src/avro/datafile.py
+++ b/lang/py/src/avro/datafile.py
@@ -373,21 +373,14 @@ class DataFileReader(object):
if proposed_sync_marker != self.sync_marker:
self.reader.seek(-SYNC_SIZE, 1)
return False
- else:
- return True
+ return True
- # TODO(hammer): handle block of length zero
- # TODO(hammer): clean this up with recursion
def next(self):
"""Return the next datum in the file."""
- if self.block_count == 0:
- if self.is_EOF():
+ while self.block_count == 0:
+ if self.is_EOF() or (self._skip_sync() and self.is_EOF()):
raise StopIteration
- elif self._skip_sync():
- if self.is_EOF(): raise StopIteration
- self._read_block_header()
- else:
- self._read_block_header()
+ self._read_block_header()
datum = self.datum_reader.read(self.datum_decoder)
self.block_count -= 1
diff --git a/lang/py/test/test_datafile.py b/lang/py/test/test_datafile.py
index bceb071..b020222 100644
--- a/lang/py/test/test_datafile.py
+++ b/lang/py/test/test_datafile.py
@@ -203,5 +203,19 @@ class TestDataFile(unittest.TestCase):
datums.append(datum)
self.assertTrue(reader.closed)
+ def test_empty_datafile(self):
+ """A reader should not fail to read a file consisting of a single empty block."""
+ sample_schema = schema.parse(SCHEMAS_TO_VALIDATE[1][0])
+ with datafile.DataFileWriter(open(FILENAME, 'wb'), io.DatumWriter(),
+ sample_schema) as dfw:
+ dfw.flush()
+ # Write an empty block
+ dfw.encoder.write_long(0)
+ dfw.encoder.write_long(0)
+ dfw.writer.write(dfw.sync_marker)
+
+ with datafile.DataFileReader(open(FILENAME, 'rb'), io.DatumReader()) as dfr:
+ self.assertEqual([], list(dfr))
+
if __name__ == '__main__':
unittest.main()
diff --git a/lang/py3/avro/datafile.py b/lang/py3/avro/datafile.py
index 6a3d46e..9a671f1 100644
--- a/lang/py3/avro/datafile.py
+++ b/lang/py3/avro/datafile.py
@@ -403,10 +403,6 @@ class DataFileReader(object):
def __iter__(self):
return self
- def __next__(self):
- """Implements the iterator interface."""
- return next(self)
-
# read-only properties
@property
def reader(self):
@@ -555,21 +551,14 @@ class DataFileReader(object):
if proposed_sync_marker != self.sync_marker:
self.reader.seek(-SYNC_SIZE, 1)
return False
- else:
- return True
+ return True
- # TODO: handle block of length zero
- # TODO: clean this up with recursion
def __next__(self):
"""Return the next datum in the file."""
- if self.block_count == 0:
- if self.is_EOF():
+ while self.block_count == 0:
+ if self.is_EOF() or (self._skip_sync() and self.is_EOF()):
raise StopIteration
- elif self._skip_sync():
- if self.is_EOF(): raise StopIteration
- self._read_block_header()
- else:
- self._read_block_header()
+ self._read_block_header()
datum = self.datum_reader.read(self.datum_decoder)
self._block_count -= 1
diff --git a/lang/py3/avro/tests/test_datafile.py b/lang/py3/avro/tests/test_datafile.py
index 3342098..9ad6a32 100644
--- a/lang/py3/avro/tests/test_datafile.py
+++ b/lang/py3/avro/tests/test_datafile.py
@@ -287,6 +287,21 @@ class TestDataFile(unittest.TestCase):
datums.append(datum)
self.assertTrue(reader.closed)
+ def test_empty_datafile(self):
+ """A reader should not fail to read a file consisting of a single empty block."""
+ file_path = self.NewTempFile()
+ sample_schema = schema.parse(SCHEMAS_TO_VALIDATE[1][0])
+ with datafile.DataFileWriter(open(file_path, 'wb'), io.DatumWriter(),
+ sample_schema) as dfw:
+ dfw.flush()
+ # Write an empty block
+ dfw.encoder.write_long(0)
+ dfw.encoder.write_long(0)
+ dfw.writer.write(dfw.sync_marker)
+
+ with datafile.DataFileReader(open(file_path, 'rb'), io.DatumReader()) as dfr:
+ self.assertEqual([], list(dfr))
+
# ------------------------------------------------------------------------------