You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by nk...@apache.org on 2019/07/04 09:27:32 UTC

[avro] branch master updated: AVRO-2460: Add zstd codec support to the Python3 bindings (#575)

This is an automated email from the ASF dual-hosted git repository.

nkollar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/master by this push:
     new a49c342  AVRO-2460: Add zstd codec support to the Python3 bindings (#575)
a49c342 is described below

commit a49c3427694636280b5c09560863b031cd672d40
Author: Kengo Seki <se...@apache.org>
AuthorDate: Thu Jul 4 18:27:27 2019 +0900

    AVRO-2460: Add zstd codec support to the Python3 bindings (#575)
---
 lang/py3/avro/datafile.py            | 22 ++++++++++++++++++++++
 lang/py3/avro/tests/test_datafile.py |  7 ++++++-
 share/docker/Dockerfile              |  5 +++++
 3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/lang/py3/avro/datafile.py b/lang/py3/avro/datafile.py
index adb2429..1b2b322 100644
--- a/lang/py3/avro/datafile.py
+++ b/lang/py3/avro/datafile.py
@@ -34,6 +34,11 @@ try:
 except ImportError:
   has_snappy = False
 
+try:
+  import zstandard as zstd
+  has_zstandard = True
+except ImportError:
+  has_zstandard = False
 
 logger = logging.getLogger(__name__)
 
@@ -80,6 +85,8 @@ META_SCHEMA = schema.Parse("""
 VALID_CODECS = frozenset(['null', 'deflate'])
 if has_snappy:
   VALID_CODECS = frozenset.union(VALID_CODECS, ['snappy'])
+if has_zstandard:
+  VALID_CODECS = frozenset.union(VALID_CODECS, ['zstandard'])
 
 # Not used yet
 VALID_ENCODINGS = frozenset(['binary'])
@@ -272,6 +279,9 @@ class DataFileWriter(object):
     elif codec == 'snappy':
       compressed_data = snappy.compress(uncompressed_data)
       compressed_data_length = len(compressed_data) + 4 # crc32
+    elif codec == 'zstandard':
+      compressed_data = zstd.ZstdCompressor().compress(uncompressed_data)
+      compressed_data_length = len(compressed_data)
     else:
       fail_msg = '"%s" codec is not supported.' % codec
       raise DataFileException(fail_msg)
@@ -495,6 +505,18 @@ class DataFileReader(object):
       uncompressed = snappy.decompress(data)
       self._datum_decoder = avro_io.BinaryDecoder(io.BytesIO(uncompressed))
       self.raw_decoder.check_crc32(uncompressed);
+    elif self.codec == 'zstandard':
+      length = self.raw_decoder.read_long()
+      data = self.raw_decoder.read(length)
+      uncompressed = bytearray()
+      dctx = zstd.ZstdDecompressor()
+      with dctx.stream_reader(io.BytesIO(data)) as reader:
+        while True:
+          chunk = reader.read(16384)
+          if not chunk:
+            break
+          uncompressed.extend(chunk)
+      self._datum_decoder = avro_io.BinaryDecoder(io.BytesIO(uncompressed))
     else:
       raise DataFileException("Unknown codec: %r" % self.codec)
 
diff --git a/lang/py3/avro/tests/test_datafile.py b/lang/py3/avro/tests/test_datafile.py
index d5b07df..ceeab82 100644
--- a/lang/py3/avro/tests/test_datafile.py
+++ b/lang/py3/avro/tests/test_datafile.py
@@ -83,8 +83,13 @@ try:
   import snappy
   CODECS_TO_VALIDATE += ('snappy',)
 except ImportError:
-  logging.info('Snappy not present, will skip testing it.')
+  logging.warning('Snappy not present, will skip testing it.')
 
+try:
+  import zstandard
+  CODECS_TO_VALIDATE += ('zstandard',)
+except ImportError:
+  logging.warning('Zstandard not present, will skip testing it.')
 
 # ------------------------------------------------------------------------------
 
diff --git a/share/docker/Dockerfile b/share/docker/Dockerfile
index b9d10e6..89886ea 100644
--- a/share/docker/Dockerfile
+++ b/share/docker/Dockerfile
@@ -69,8 +69,10 @@ RUN apt-get -qq update && \
     python \
     python-setuptools \
     python-snappy \
+    python3-pip \
     python3-setuptools \
     python3-snappy \
+    python3-wheel \
     rake \
     ruby \
     ruby-dev \
@@ -91,6 +93,9 @@ RUN curl -L https://cpanmin.us | perl - --mirror https://www.cpan.org/ --self-up
 # Install PHPUnit
 RUN wget -O /usr/local/bin/phpunit https://phar.phpunit.de/phpunit-5.6.phar && chmod +x /usr/local/bin/phpunit
 
+# Install Python packages
+RUN pip3 install zstandard
+
 # Install Ruby modules
 RUN gem install echoe yajl-ruby multi_json snappy