You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by dk...@apache.org on 2020/05/21 14:11:15 UTC

[avro] branch master updated: AVRO-2547: Add bzip2 support to the Perl bindings

This is an automated email from the ASF dual-hosted git repository.

dkulp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/master by this push:
     new eb21a21  AVRO-2547: Add bzip2 support to the Perl bindings
eb21a21 is described below

commit eb21a2148b552d3df2db251558c7986ffb2bc833
Author: Kengo Seki <se...@apache.org>
AuthorDate: Wed Sep 4 13:58:10 2019 +0900

    AVRO-2547: Add bzip2 support to the Perl bindings
---
 lang/perl/lib/Avro.pm                |  2 +-
 lang/perl/lib/Avro/DataFile.pm       |  1 +
 lang/perl/lib/Avro/DataFileReader.pm | 17 ++++++++++++++---
 lang/perl/lib/Avro/DataFileWriter.pm |  9 +++++++++
 lang/perl/t/04_datafile.t            | 29 +++++++++++++++++++++++++++++
 5 files changed, 54 insertions(+), 4 deletions(-)

diff --git a/lang/perl/lib/Avro.pm b/lang/perl/lib/Avro.pm
index f0ecfd1..8910951 100644
--- a/lang/perl/lib/Avro.pm
+++ b/lang/perl/lib/Avro.pm
@@ -18,7 +18,7 @@
 package Avro;
 
 use strict;
-use 5.008_001;
+use 5.010_001;
 our $VERSION = '++MODULE_VERSION++';
 
 1;
diff --git a/lang/perl/lib/Avro/DataFile.pm b/lang/perl/lib/Avro/DataFile.pm
index ac3e6da..6cdf434 100644
--- a/lang/perl/lib/Avro/DataFile.pm
+++ b/lang/perl/lib/Avro/DataFile.pm
@@ -38,6 +38,7 @@ EOH
 our %ValidCodec = (
     null      => 1,
     deflate   => 1,
+    bzip2     => 1,
     zstandard => 1,
 );
 
diff --git a/lang/perl/lib/Avro/DataFileReader.pm b/lang/perl/lib/Avro/DataFileReader.pm
index a51b159..0ce9181 100644
--- a/lang/perl/lib/Avro/DataFileReader.pm
+++ b/lang/perl/lib/Avro/DataFileReader.pm
@@ -36,6 +36,7 @@ use Avro::BinaryDecoder;
 use Avro::Schema;
 use Carp;
 use Compress::Zstd;
+use IO::Uncompress::Bunzip2 qw(bunzip2);
 use IO::Uncompress::RawInflate ;
 use Fcntl();
 
@@ -215,9 +216,19 @@ sub read_block_header {
     $datafile->{block_marker} = $marker;
 
     ## this is our new reader
-    $datafile->{reader} = $codec eq 'deflate' ?
-        IO::Uncompress::RawInflate->new(\$block) :
-        do { open $fh, '<', \(decompress(\$block)); $fh };
+    $datafile->{reader} = do {
+        if ($codec eq 'deflate') {
+            IO::Uncompress::RawInflate->new(\$block);
+        }
+        elsif ($codec eq 'bzip2') {
+            my $uncompressed;
+            bunzip2 \$block => \$uncompressed;
+            do { open $fh, '<', \$uncompressed; $fh };
+        }
+        elsif ($codec eq 'zstandard') {
+            do { open $fh, '<', \(decompress(\$block)); $fh };
+        }
+    };
 
     return;
 }
diff --git a/lang/perl/lib/Avro/DataFileWriter.pm b/lang/perl/lib/Avro/DataFileWriter.pm
index 74c5388..d45e2b0 100644
--- a/lang/perl/lib/Avro/DataFileWriter.pm
+++ b/lang/perl/lib/Avro/DataFileWriter.pm
@@ -37,6 +37,7 @@ use Avro::Schema;
 use Carp;
 use Compress::Zstd;
 use Error::Simple;
+use IO::Compress::Bzip2 qw(bzip2 $Bzip2Error);
 use IO::Compress::RawDeflate qw(rawdeflate $RawDeflateError);
 
 our $VERSION = '++MODULE_VERSION++';
@@ -107,6 +108,14 @@ sub buffer_or_print {
         $datafile->{_current_size} =
             bytes::length($datafile->{_compressed_block});
     }
+    elsif ($codec eq 'bzip2') {
+        my $uncompressed = join('', map { $$_ } @$ser_objects);
+        my $compressed;
+        bzip2 \$uncompressed => \$compressed
+            or croak "bzip2 failed: $Bzip2Error";
+        $datafile->{_compressed_block} = $compressed;
+        $datafile->{_current_size} = bytes::length($datafile->{_compressed_block});
+    }
     elsif ($codec eq 'zstandard') {
         my $uncompressed = join('', map { $$_ } @$ser_objects);
         $datafile->{_compressed_block} = compress(\$uncompressed);
diff --git a/lang/perl/t/04_datafile.t b/lang/perl/t/04_datafile.t
index dd2ed1c..a22efc8 100644
--- a/lang/perl/t/04_datafile.t
+++ b/lang/perl/t/04_datafile.t
@@ -118,6 +118,35 @@ is_deeply $all[0], $data, "Our data is intact!";
     is scalar @all, 1, "one object back";
     is_deeply $all[0], $data, "Our data is intact!";
 
+
+    ## bzip2!
+    $zfh = File::Temp->new(UNLINK => 0);
+    $write_file = Avro::DataFileWriter->new(
+        fh            => $zfh,
+        writer_schema => $schema,
+        codec         => 'bzip2',
+        metadata      => {
+            some => 'metadata',
+        },
+    );
+    $write_file->print($data);
+    $write_file->flush;
+
+    ## rewind
+    seek $zfh, 0, 0;
+
+    $read_file = Avro::DataFileReader->new(
+        fh            => $zfh,
+        reader_schema => $schema,
+    );
+    is $read_file->metadata->{'avro.codec'}, 'bzip2', 'avro.codec';
+    is $read_file->metadata->{'some'}, 'metadata', 'custom meta';
+
+    @all = $read_file->all;
+    is scalar @all, 1, "one object back";
+    is_deeply $all[0], $data, "Our data is intact!";
+
+
     ## zstandard!
     $zfh = File::Temp->new(UNLINK => 0);
     $write_file = Avro::DataFileWriter->new(