You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by nk...@apache.org on 2019/07/04 09:27:32 UTC
[avro] branch master updated: AVRO-2460: Add zstd codec support to
the Python3 bindings (#575)
This is an automated email from the ASF dual-hosted git repository.
nkollar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/master by this push:
new a49c342 AVRO-2460: Add zstd codec support to the Python3 bindings (#575)
a49c342 is described below
commit a49c3427694636280b5c09560863b031cd672d40
Author: Kengo Seki <se...@apache.org>
AuthorDate: Thu Jul 4 18:27:27 2019 +0900
AVRO-2460: Add zstd codec support to the Python3 bindings (#575)
---
lang/py3/avro/datafile.py | 22 ++++++++++++++++++++++
lang/py3/avro/tests/test_datafile.py | 7 ++++++-
share/docker/Dockerfile | 5 +++++
3 files changed, 33 insertions(+), 1 deletion(-)
diff --git a/lang/py3/avro/datafile.py b/lang/py3/avro/datafile.py
index adb2429..1b2b322 100644
--- a/lang/py3/avro/datafile.py
+++ b/lang/py3/avro/datafile.py
@@ -34,6 +34,11 @@ try:
except ImportError:
has_snappy = False
+try:
+ import zstandard as zstd
+ has_zstandard = True
+except ImportError:
+ has_zstandard = False
logger = logging.getLogger(__name__)
@@ -80,6 +85,8 @@ META_SCHEMA = schema.Parse("""
VALID_CODECS = frozenset(['null', 'deflate'])
if has_snappy:
VALID_CODECS = frozenset.union(VALID_CODECS, ['snappy'])
+if has_zstandard:
+ VALID_CODECS = frozenset.union(VALID_CODECS, ['zstandard'])
# Not used yet
VALID_ENCODINGS = frozenset(['binary'])
@@ -272,6 +279,9 @@ class DataFileWriter(object):
elif codec == 'snappy':
compressed_data = snappy.compress(uncompressed_data)
compressed_data_length = len(compressed_data) + 4 # crc32
+ elif codec == 'zstandard':
+ compressed_data = zstd.ZstdCompressor().compress(uncompressed_data)
+ compressed_data_length = len(compressed_data)
else:
fail_msg = '"%s" codec is not supported.' % codec
raise DataFileException(fail_msg)
@@ -495,6 +505,18 @@ class DataFileReader(object):
uncompressed = snappy.decompress(data)
self._datum_decoder = avro_io.BinaryDecoder(io.BytesIO(uncompressed))
self.raw_decoder.check_crc32(uncompressed);
+ elif self.codec == 'zstandard':
+ length = self.raw_decoder.read_long()
+ data = self.raw_decoder.read(length)
+ uncompressed = bytearray()
+ dctx = zstd.ZstdDecompressor()
+ with dctx.stream_reader(io.BytesIO(data)) as reader:
+ while True:
+ chunk = reader.read(16384)
+ if not chunk:
+ break
+ uncompressed.extend(chunk)
+ self._datum_decoder = avro_io.BinaryDecoder(io.BytesIO(uncompressed))
else:
raise DataFileException("Unknown codec: %r" % self.codec)
diff --git a/lang/py3/avro/tests/test_datafile.py b/lang/py3/avro/tests/test_datafile.py
index d5b07df..ceeab82 100644
--- a/lang/py3/avro/tests/test_datafile.py
+++ b/lang/py3/avro/tests/test_datafile.py
@@ -83,8 +83,13 @@ try:
import snappy
CODECS_TO_VALIDATE += ('snappy',)
except ImportError:
- logging.info('Snappy not present, will skip testing it.')
+ logging.warning('Snappy not present, will skip testing it.')
+try:
+ import zstandard
+ CODECS_TO_VALIDATE += ('zstandard',)
+except ImportError:
+ logging.warning('Zstandard not present, will skip testing it.')
# ------------------------------------------------------------------------------
diff --git a/share/docker/Dockerfile b/share/docker/Dockerfile
index b9d10e6..89886ea 100644
--- a/share/docker/Dockerfile
+++ b/share/docker/Dockerfile
@@ -69,8 +69,10 @@ RUN apt-get -qq update && \
python \
python-setuptools \
python-snappy \
+ python3-pip \
python3-setuptools \
python3-snappy \
+ python3-wheel \
rake \
ruby \
ruby-dev \
@@ -91,6 +93,9 @@ RUN curl -L https://cpanmin.us | perl - --mirror https://www.cpan.org/ --self-up
# Install PHPUnit
RUN wget -O /usr/local/bin/phpunit https://phar.phpunit.de/phpunit-5.6.phar && chmod +x /usr/local/bin/phpunit
+# Install Python packages
+RUN pip3 install zstandard
+
# Install Ruby modules
RUN gem install echoe yajl-ruby multi_json snappy