You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by fo...@apache.org on 2019/07/04 09:22:34 UTC
[avro] branch master updated: AVRO-2446: Add deflate codec support
to the PHP bindings (#561)
This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/master by this push:
new b640728 AVRO-2446: Add deflate codec support to the PHP bindings (#561)
b640728 is described below
commit b6407285171e6c08172b2239b0c989a17f6be628
Author: Kengo Seki <se...@apache.org>
AuthorDate: Thu Jul 4 18:22:27 2019 +0900
AVRO-2446: Add deflate codec support to the PHP bindings (#561)
* AVRO-2446: Add deflate codec support to the PHP bindings
* AVRO-2446: Add deflate codec support to the PHP bindings
Update lang/php/test/generate_interop_data.php to generate
a deflate-compressed file in addition to an uncompressed one,
so that we can confirm the write operation is properly implemented
via other bindings.
* AVRO-2446: Add deflate codec support to the PHP bindings
Add the default value to the open_writer function,
for maintaining backward compatibility.
---
lang/php/lib/avro/data_file.php | 68 +++++---
lang/php/test/DataFileTest.php | 278 +++++++++++++++++---------------
lang/php/test/InterOpTest.php | 6 +-
lang/php/test/generate_interop_data.php | 11 +-
4 files changed, 196 insertions(+), 167 deletions(-)
diff --git a/lang/php/lib/avro/data_file.php b/lang/php/lib/avro/data_file.php
index 563d4cf..4271897 100644
--- a/lang/php/lib/avro/data_file.php
+++ b/lang/php/lib/avro/data_file.php
@@ -75,10 +75,8 @@ class AvroDataIO
/**
* @var array array of valid codec names
- * @todo Avro implementations are required to implement deflate codec as well,
- * so implement it already!
*/
- private static $valid_codecs = array(self::NULL_CODEC);
+ private static $valid_codecs = array(self::NULL_CODEC, self::DEFLATE_CODEC);
/**
* @var AvroSchema cached version of metadata schema object
@@ -111,13 +109,14 @@ class AvroDataIO
* @param string $file_path file_path of file to open
* @param string $mode one of AvroFile::READ_MODE or AvroFile::WRITE_MODE
* @param string $schema_json JSON of writer's schema
+ * @param string $codec compression codec
* @returns AvroDataIOWriter instance of AvroDataIOWriter
*
* @throws AvroDataIOException if $writers_schema is not provided
* or if an invalid $mode is given.
*/
public static function open_file($file_path, $mode=AvroFile::READ_MODE,
- $schema_json=null)
+ $schema_json=null, $codec=self::NULL_CODEC)
{
$schema = !is_null($schema_json)
? AvroSchema::parse($schema_json) : null;
@@ -129,7 +128,7 @@ class AvroDataIO
if (is_null($schema))
throw new AvroDataIOException('Writing an Avro file requires a schema.');
$file = new AvroFile($file_path, AvroFile::WRITE_MODE);
- $io = self::open_writer($file, $schema);
+ $io = self::open_writer($file, $schema, $codec);
break;
case AvroFile::READ_MODE:
$file = new AvroFile($file_path, AvroFile::READ_MODE);
@@ -146,7 +145,7 @@ class AvroDataIO
/**
* @returns array array of valid codecs
*/
- private static function valid_codecs()
+ public static function valid_codecs()
{
return self::$valid_codecs;
}
@@ -163,12 +162,13 @@ class AvroDataIO
/**
* @param AvroIO $io
* @param AvroSchema $schema
+ * @param string $codec
* @returns AvroDataIOWriter
*/
- protected function open_writer($io, $schema)
+ protected function open_writer($io, $schema, $codec=self::NULL_CODEC)
{
$writer = new AvroIODatumWriter($schema);
- return new AvroDataIOWriter($io, $writer, $schema);
+ return new AvroDataIOWriter($io, $writer, $schema, $codec);
}
/**
@@ -222,10 +222,17 @@ class AvroDataIOReader
private $block_count;
/**
+ * @var compression codec
+ */
+ private $codec;
+
+ /**
* @param AvroIO $io source from which to read
* @param AvroIODatumReader $datum_reader reader that understands
* the data schema
* @throws AvroDataIOException if $io is not an instance of AvroIO
+ * or the codec specified in the header
+ * is not supported
* @uses read_header()
*/
public function __construct($io, $datum_reader)
@@ -243,6 +250,7 @@ class AvroDataIOReader
AvroDataIO::METADATA_CODEC_ATTR);
if ($codec && !AvroDataIO::is_valid_codec($codec))
throw new AvroDataIOException(sprintf('Unknown codec: %s', $codec));
+ $this->codec = $codec;
$this->block_count = 0;
// FIXME: Seems unsanitary to set writers_schema here.
@@ -294,9 +302,15 @@ class AvroDataIOReader
if ($this->is_eof())
break;
- $this->read_block_header();
+ $length = $this->read_block_header();
+ $decoder = $this->decoder;
+ if ($this->codec == AvroDataIO::DEFLATE_CODEC) {
+ $compressed = $decoder->read($length);
+ $datum = gzinflate($compressed);
+ $decoder = new AvroIOBinaryDecoder(new AvroStringIO($datum));
+ }
}
- $data []= $this->datum_reader->read($this->decoder);
+ $data []= $this->datum_reader->read($decoder);
$this->block_count -= 1;
}
return $data;
@@ -406,11 +420,17 @@ class AvroDataIOWriter
private $metadata;
/**
+ * @var compression codec
+ */
+ private $codec;
+
+ /**
* @param AvroIO $io
* @param AvroIODatumWriter $datum_writer
* @param AvroSchema $writers_schema
+ * @param string $codec
*/
- public function __construct($io, $datum_writer, $writers_schema=null)
+ public function __construct($io, $datum_writer, $writers_schema=null, $codec=AvroDataIO::NULL_CODEC)
{
if (!($io instanceof AvroIO))
throw new AvroDataIOException('io must be instance of AvroIO');
@@ -425,8 +445,12 @@ class AvroDataIOWriter
if ($writers_schema)
{
+ if (!AvroDataIO::is_valid_codec($codec))
+ throw new AvroDataIOException(
+ sprintf('codec %s is not supported', $codec));
+
$this->sync_marker = self::generate_sync_marker();
- $this->metadata[AvroDataIO::METADATA_CODEC_ATTR] = AvroDataIO::NULL_CODEC;
+ $this->metadata[AvroDataIO::METADATA_CODEC_ATTR] = $this->codec = $codec;
$this->metadata[AvroDataIO::METADATA_SCHEMA_ATTR] = strval($writers_schema);
$this->write_header();
}
@@ -434,8 +458,8 @@ class AvroDataIOWriter
{
$dfr = new AvroDataIOReader($this->io, new AvroIODatumReader());
$this->sync_marker = $dfr->sync_marker;
- $this->metadata[AvroDataIO::METADATA_CODEC_ATTR] = $dfr->metadata[AvroDataIO::METADATA_CODEC_ATTR];
-
+ $this->metadata[AvroDataIO::METADATA_CODEC_ATTR] = $this->codec
+ = $dfr->metadata[AvroDataIO::METADATA_CODEC_ATTR];
$schema_from_file = $dfr->metadata[AvroDataIO::METADATA_SCHEMA_ATTR];
$this->metadata[AvroDataIO::METADATA_SCHEMA_ATTR] = $schema_from_file;
$this->datum_writer->writers_schema = AvroSchema::parse($schema_from_file);
@@ -479,10 +503,6 @@ class AvroDataIOWriter
/**
* Writes a block of data to the AvroIO object container.
- * @throws AvroDataIOException if the codec provided by the encoder
- * is not supported
- * @internal Should the codec check happen in the constructor?
- * Why wait until we're writing data?
*/
private function write_block()
{
@@ -490,16 +510,12 @@ class AvroDataIOWriter
{
$this->encoder->write_long($this->block_count);
$to_write = strval($this->buffer);
- $this->encoder->write_long(strlen($to_write));
- if (AvroDataIO::is_valid_codec(
- $this->metadata[AvroDataIO::METADATA_CODEC_ATTR]))
- $this->write($to_write);
- else
- throw new AvroDataIOException(
- sprintf('codec %s is not supported',
- $this->metadata[AvroDataIO::METADATA_CODEC_ATTR]));
+ if ($this->codec == AvroDataIO::DEFLATE_CODEC)
+ $to_write = gzdeflate($to_write);
+ $this->encoder->write_long(strlen($to_write));
+ $this->write($to_write);
$this->write($this->sync_marker);
$this->buffer->truncate();
$this->block_count = 0;
diff --git a/lang/php/test/DataFileTest.php b/lang/php/test/DataFileTest.php
index 241f667..703e3d2 100644
--- a/lang/php/test/DataFileTest.php
+++ b/lang/php/test/DataFileTest.php
@@ -56,6 +56,7 @@ class DataFileTest extends PHPUnit_Framework_TestCase
mkdir(TEST_TEMP_DIR);
$this->remove_data_files();
}
+
protected function tearDown()
{
$this->remove_data_files();
@@ -63,118 +64,131 @@ class DataFileTest extends PHPUnit_Framework_TestCase
public function test_write_read_nothing_round_trip()
{
- $data_file = $this->add_data_file('data-wr-nothing-null.avr');
- $writers_schema = '"null"';
- $dw = AvroDataIO::open_file($data_file, 'w', $writers_schema);
- $dw->close();
-
- $dr = AvroDataIO::open_file($data_file);
- $read_data = array_shift($dr->data());
- $dr->close();
- $this->assertEquals(null, $read_data);
+ foreach (AvroDataIO::valid_codecs() as $codec) {
+ $data_file = $this->add_data_file(sprintf('data-wr-nothing-null-%s.avr', $codec));
+ $writers_schema = '"null"';
+ $dw = AvroDataIO::open_file($data_file, 'w', $writers_schema, $codec);
+ $dw->close();
+
+ $dr = AvroDataIO::open_file($data_file);
+ $read_data = array_shift($dr->data());
+ $dr->close();
+ $this->assertEquals(null, $read_data);
+ }
}
public function test_write_read_null_round_trip()
{
- $data_file = $this->add_data_file('data-wr-null.avr');
- $writers_schema = '"null"';
- $data = null;
- $dw = AvroDataIO::open_file($data_file, 'w', $writers_schema);
- $dw->append($data);
- $dw->close();
-
- $dr = AvroDataIO::open_file($data_file);
- $read_data = array_shift($dr->data());
- $dr->close();
- $this->assertEquals($data, $read_data);
+ foreach (AvroDataIO::valid_codecs() as $codec) {
+ $data_file = $this->add_data_file(sprintf('data-wr-null-%s.avr', $codec));
+ $writers_schema = '"null"';
+ $data = null;
+ $dw = AvroDataIO::open_file($data_file, 'w', $writers_schema, $codec);
+ $dw->append($data);
+ $dw->close();
+
+ $dr = AvroDataIO::open_file($data_file);
+ $read_data = array_shift($dr->data());
+ $dr->close();
+ $this->assertEquals($data, $read_data);
+ }
}
public function test_write_read_string_round_trip()
{
- $data_file = $this->add_data_file('data-wr-str.avr');
- $writers_schema = '"string"';
- $data = 'foo';
- $dw = AvroDataIO::open_file($data_file, 'w', $writers_schema);
- $dw->append($data);
- $dw->close();
-
- $dr = AvroDataIO::open_file($data_file);
- $read_data = array_shift($dr->data());
- $dr->close();
- $this->assertEquals($data, $read_data);
+ foreach (AvroDataIO::valid_codecs() as $codec) {
+ $data_file = $this->add_data_file(sprintf('data-wr-str-%s.avr', $codec));
+ $writers_schema = '"string"';
+ $data = 'foo';
+ $dw = AvroDataIO::open_file($data_file, 'w', $writers_schema, $codec);
+ $dw->append($data);
+ $dw->close();
+
+ $dr = AvroDataIO::open_file($data_file);
+ $read_data = array_shift($dr->data());
+ $dr->close();
+ $this->assertEquals($data, $read_data);
+ }
}
-
public function test_write_read_round_trip()
{
+ foreach (AvroDataIO::valid_codecs() as $codec) {
+ $data_file = $this->add_data_file(sprintf('data-wr-int-%s.avr', $codec));
+ $writers_schema = '"int"';
+ $data = 1;
- $data_file = $this->add_data_file('data-wr-int.avr');
- $writers_schema = '"int"';
- $data = 1;
-
- $dw = AvroDataIO::open_file($data_file, 'w', $writers_schema);
- $dw->append(1);
- $dw->close();
-
- $dr = AvroDataIO::open_file($data_file);
- $read_data = array_shift($dr->data());
- $dr->close();
- $this->assertEquals($data, $read_data);
+ $dw = AvroDataIO::open_file($data_file, 'w', $writers_schema, $codec);
+ $dw->append(1);
+ $dw->close();
+ $dr = AvroDataIO::open_file($data_file);
+ $read_data = array_shift($dr->data());
+ $dr->close();
+ $this->assertEquals($data, $read_data);
+ }
}
public function test_write_read_true_round_trip()
{
- $data_file = $this->add_data_file('data-wr-true.avr');
- $writers_schema = '"boolean"';
- $datum = true;
- $dw = AvroDataIO::open_file($data_file, 'w', $writers_schema);
- $dw->append($datum);
- $dw->close();
+ foreach (AvroDataIO::valid_codecs() as $codec) {
+ $data_file = $this->add_data_file(sprintf('data-wr-true-%s.avr', $codec));
+ $writers_schema = '"boolean"';
+ $datum = true;
+ $dw = AvroDataIO::open_file($data_file, 'w', $writers_schema, $codec);
+ $dw->append($datum);
+ $dw->close();
- $dr = AvroDataIO::open_file($data_file);
- $read_datum = array_shift($dr->data());
- $dr->close();
- $this->assertEquals($datum, $read_datum);
+ $dr = AvroDataIO::open_file($data_file);
+ $read_datum = array_shift($dr->data());
+ $dr->close();
+ $this->assertEquals($datum, $read_datum);
+ }
}
public function test_write_read_false_round_trip()
{
- $data_file = $this->add_data_file('data-wr-false.avr');
- $writers_schema = '"boolean"';
- $datum = false;
- $dw = AvroDataIO::open_file($data_file, 'w', $writers_schema);
- $dw->append($datum);
- $dw->close();
+ foreach (AvroDataIO::valid_codecs() as $codec) {
+ $data_file = $this->add_data_file(sprintf('data-wr-false-%s.avr', $codec));
+ $writers_schema = '"boolean"';
+ $datum = false;
+ $dw = AvroDataIO::open_file($data_file, 'w', $writers_schema, $codec);
+ $dw->append($datum);
+ $dw->close();
- $dr = AvroDataIO::open_file($data_file);
- $read_datum = array_shift($dr->data());
- $dr->close();
- $this->assertEquals($datum, $read_datum);
+ $dr = AvroDataIO::open_file($data_file);
+ $read_datum = array_shift($dr->data());
+ $dr->close();
+ $this->assertEquals($datum, $read_datum);
+ }
}
+
public function test_write_read_int_array_round_trip()
{
- $data_file = $this->add_data_file('data-wr-int-ary.avr');
- $writers_schema = '"int"';
- $data = array(10, 20, 30, 40, 50, 60, 70);
- $dw = AvroDataIO::open_file($data_file, 'w', $writers_schema);
- foreach ($data as $datum)
- $dw->append($datum);
- $dw->close();
-
- $dr = AvroDataIO::open_file($data_file);
- $read_data = $dr->data();
- $dr->close();
- $this->assertEquals($data, $read_data,
- sprintf("in: %s\nout: %s",
- json_encode($data), json_encode($read_data)));
+ foreach (AvroDataIO::valid_codecs() as $codec) {
+ $data_file = $this->add_data_file(sprintf('data-wr-int-ary-%s.avr', $codec));
+ $writers_schema = '"int"';
+ $data = array(10, 20, 30, 40, 50, 60, 70);
+ $dw = AvroDataIO::open_file($data_file, 'w', $writers_schema, $codec);
+ foreach ($data as $datum)
+ $dw->append($datum);
+ $dw->close();
+
+ $dr = AvroDataIO::open_file($data_file);
+ $read_data = $dr->data();
+ $dr->close();
+ $this->assertEquals($data, $read_data,
+ sprintf("in: %s\nout: %s",
+ json_encode($data), json_encode($read_data)));
+ }
}
public function test_differing_schemas_with_primitives()
{
- $data_file = $this->add_data_file('data-prim.avr');
+ foreach (AvroDataIO::valid_codecs() as $codec) {
+ $data_file = $this->add_data_file('data-prim-%s.avr', $codec);
- $writer_schema = <<<JSON
+ $writer_schema = <<<JSON
{ "type": "record",
"name": "User",
"fields" : [
@@ -183,33 +197,35 @@ class DataFileTest extends PHPUnit_Framework_TestCase
{"name": "verified", "type": "boolean", "default": "false"}
]}
JSON;
- $data = array(array('username' => 'john', 'age' => 25, 'verified' => true),
- array('username' => 'ryan', 'age' => 23, 'verified' => false));
- $dw = AvroDataIO::open_file($data_file, 'w', $writer_schema);
- foreach ($data as $datum)
- {
- $dw->append($datum);
- }
- $dw->close();
- $reader_schema = <<<JSON
- { "type": "record",
- "name": "User",
- "fields" : [
- {"name": "username", "type": "string"}
- ]}
+ $data = array(array('username' => 'john', 'age' => 25, 'verified' => true),
+ array('username' => 'ryan', 'age' => 23, 'verified' => false));
+ $dw = AvroDataIO::open_file($data_file, 'w', $writer_schema, $codec);
+ foreach ($data as $datum)
+ {
+ $dw->append($datum);
+ }
+ $dw->close();
+ $reader_schema = <<<JSON
+ { "type": "record",
+ "name": "User",
+ "fields" : [
+ {"name": "username", "type": "string"}
+ ]}
JSON;
- $dr = AvroDataIO::open_file($data_file, 'r', $reader_schema);
- foreach ($dr->data() as $index => $record)
- {
- $this->assertEquals($data[$index]['username'], $record['username']);
+ $dr = AvroDataIO::open_file($data_file, 'r', $reader_schema);
+ foreach ($dr->data() as $index => $record)
+ {
+ $this->assertEquals($data[$index]['username'], $record['username']);
+ }
}
}
public function test_differing_schemas_with_complex_objects()
{
- $data_file = $this->add_data_file('data-complex.avr');
+ foreach (AvroDataIO::valid_codecs() as $codec) {
+ $data_file = $this->add_data_file('data-complex-%s.avr', $codec);
- $writers_schema = <<<JSON
+ $writers_schema = <<<JSON
{ "type": "record",
"name": "something",
"fields": [
@@ -229,42 +245,40 @@ JSON;
]}
JSON;
- $data = array(array("username" => "john",
- "something_fixed" => "foo",
- "something_enum" => "hello",
- "something_array" => array(1,2,3),
- "something_map" => array("a" => 1, "b" => 2),
- "something_record" => array("inner" => 2),
- "something_error" => array("code" => 403)),
- array("username" => "ryan",
- "something_fixed" => "bar",
- "something_enum" => "goodbye",
- "something_array" => array(1,2,3),
- "something_map" => array("a" => 2, "b" => 6),
- "something_record" => array("inner" => 1),
- "something_error" => array("code" => 401)));
- $dw = AvroDataIO::open_file($data_file, 'w', $writers_schema);
- foreach ($data as $datum)
- $dw->append($datum);
- $dw->close();
-
- foreach (array('fixed', 'enum', 'record', 'error',
- 'array' , 'map', 'union') as $s)
- {
- $readers_schema = json_decode($writers_schema, true);
- $dr = AvroDataIO::open_file($data_file, 'r', json_encode($readers_schema));
- foreach ($dr->data() as $idx => $obj)
+ $data = array(array("username" => "john",
+ "something_fixed" => "foo",
+ "something_enum" => "hello",
+ "something_array" => array(1,2,3),
+ "something_map" => array("a" => 1, "b" => 2),
+ "something_record" => array("inner" => 2),
+ "something_error" => array("code" => 403)),
+ array("username" => "ryan",
+ "something_fixed" => "bar",
+ "something_enum" => "goodbye",
+ "something_array" => array(1,2,3),
+ "something_map" => array("a" => 2, "b" => 6),
+ "something_record" => array("inner" => 1),
+ "something_error" => array("code" => 401)));
+ $dw = AvroDataIO::open_file($data_file, 'w', $writers_schema, $codec);
+ foreach ($data as $datum)
+ $dw->append($datum);
+ $dw->close();
+
+ foreach (array('fixed', 'enum', 'record', 'error',
+ 'array' , 'map', 'union') as $s)
{
- foreach ($readers_schema['fields'] as $field)
+ $readers_schema = json_decode($writers_schema, true);
+ $dr = AvroDataIO::open_file($data_file, 'r', json_encode($readers_schema));
+ foreach ($dr->data() as $idx => $obj)
{
- $field_name = $field['name'];
- $this->assertEquals($data[$idx][$field_name], $obj[$field_name]);
+ foreach ($readers_schema['fields'] as $field)
+ {
+ $field_name = $field['name'];
+ $this->assertEquals($data[$idx][$field_name], $obj[$field_name]);
+ }
}
+ $dr->close();
}
- $dr->close();
-
}
-
}
-
}
diff --git a/lang/php/test/InterOpTest.php b/lang/php/test/InterOpTest.php
index 61d01b6..874f678 100644
--- a/lang/php/test/InterOpTest.php
+++ b/lang/php/test/InterOpTest.php
@@ -38,12 +38,8 @@ class InterOpTest extends PHPUnit_Framework_TestCase
if (!($dh = opendir($data_dir)))
die("Could not open data dir '$data_dir'\n");
- /* TODO This currently only tries to read files of the form 'language.avro',
- * but not 'language_deflate.avro' as the PHP implementation is not yet
- * able to read deflate data files. When deflate support is added, change
- * this to match *.avro. */
while ($file = readdir($dh))
- if (0 < preg_match('/^[a-z]+\.avro$/', $file))
+ if (0 < preg_match('/^[a-z]+(_deflate)?\.avro$/', $file))
$data_files []= join(DIRECTORY_SEPARATOR, array($data_dir, $file));
closedir($dh);
diff --git a/lang/php/test/generate_interop_data.php b/lang/php/test/generate_interop_data.php
index 5a9ea5b..a2fd1db 100644
--- a/lang/php/test/generate_interop_data.php
+++ b/lang/php/test/generate_interop_data.php
@@ -20,7 +20,6 @@
require_once('test_helper.php');
-$data_file = join(DIRECTORY_SEPARATOR, array(AVRO_BUILD_DATA_DIR, 'php.avro'));
$datum = array('nullField' => null,
'boolField' => true,
'intField' => -42,
@@ -41,6 +40,10 @@ $datum = array('nullField' => null,
'children' => array()))));
$schema_json = file_get_contents(AVRO_INTEROP_SCHEMA);
-$io_writer = AvroDataIO::open_file($data_file, 'w', $schema_json);
-$io_writer->append($datum);
-$io_writer->close();
+foreach (AvroDataIO::valid_codecs() as $codec) {
+ $file_name = $codec == AvroDataIO::NULL_CODEC ? 'php.avro' : sprintf('php_%s.avro', $codec);
+ $data_file = join(DIRECTORY_SEPARATOR, array(AVRO_BUILD_DATA_DIR, $file_name));
+ $io_writer = AvroDataIO::open_file($data_file, 'w', $schema_json, $codec);
+ $io_writer->append($datum);
+ $io_writer->close();
+}