You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by dk...@apache.org on 2020/05/26 12:16:22 UTC
[avro] branch master updated: AVRO-2842: PHP Add phpcs and fix all
violations against PSR12 (#891)
This is an automated email from the ASF dual-hosted git repository.
dkulp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/master by this push:
new f30a7ca AVRO-2842: PHP Add phpcs and fix all violations against PSR12 (#891)
f30a7ca is described below
commit f30a7caee322f8679d90595f3f3d00549cdb6d29
Author: Siad Ardroumli <si...@gmail.com>
AuthorDate: Tue May 26 14:16:12 2020 +0200
AVRO-2842: PHP Add phpcs and fix all violations against PSR12 (#891)
* AVRO-2842: Add phpcs.
* AVRO-2842: Add PSR4 and code formatting.
* AVRO-2842: Fixed not found callbacks.
* AVRO-2842: Fixed PSR12 violations using phpcbf.
* AVRO-2842: Fixed PSR12 violations.
* Fixed left overs.
* Fixed example
* Added autoloader for non composer usage, updated readme and test bootstrap
* Added license
* Fixed psr issues
---
.gitignore | 1 +
composer.json | 3 +-
lang/php/README.md | 53 +
lang/php/README.txt | 45 -
lang/php/build.sh | 1 +
lang/php/examples/write_read.php | 14 +-
lang/php/lib/Avro.php | 174 +++
lang/php/lib/AvroDebug.php | 231 ++++
.../AvroException.php} | 21 +-
lang/php/lib/AvroGMP.php | 234 ++++
lang/php/lib/AvroIO.php | 130 ++
.../AvroNotImplementedException.php} | 21 +-
lang/php/lib/AvroUtil.php | 65 +
lang/php/lib/DataFile/AvroDataIO.php | 208 +++
.../DataFile/AvroDataIOException.php} | 23 +-
lang/php/lib/DataFile/AvroDataIOReader.php | 249 ++++
lang/php/lib/DataFile/AvroDataIOWriter.php | 237 ++++
lang/php/lib/Datum/AvroIOBinaryDecoder.php | 266 ++++
lang/php/lib/Datum/AvroIOBinaryEncoder.php | 179 +++
lang/php/lib/Datum/AvroIODatumReader.php | 499 +++++++
lang/php/lib/Datum/AvroIODatumWriter.php | 189 +++
.../Datum/AvroIOSchemaMatchException.php} | 40 +-
.../Datum/AvroIOTypeException.php} | 37 +-
lang/php/lib/IO/AvroFile.php | 197 +++
.../IO/AvroIOException.php} | 23 +-
lang/php/lib/IO/AvroStringIO.php | 252 ++++
lang/php/lib/Protocol/AvroProtocol.php | 66 +
lang/php/lib/Protocol/AvroProtocolMessage.php | 59 +
.../Protocol/AvroProtocolParseException.php} | 19 +-
lang/php/lib/Schema/AvroArraySchema.php | 88 ++
lang/php/lib/Schema/AvroEnumSchema.php | 119 ++
lang/php/lib/Schema/AvroField.php | 189 +++
lang/php/lib/Schema/AvroFixedSchema.php | 68 +
lang/php/lib/Schema/AvroMapSchema.php | 90 ++
lang/php/lib/Schema/AvroName.php | 168 +++
lang/php/lib/Schema/AvroNamedSchema.php | 92 ++
lang/php/lib/Schema/AvroNamedSchemata.php | 103 ++
lang/php/lib/Schema/AvroPrimitiveSchema.php | 54 +
lang/php/lib/Schema/AvroRecordSchema.php | 173 +++
lang/php/lib/Schema/AvroSchema.php | 556 ++++++++
.../Schema/AvroSchemaParseException.php} | 23 +-
lang/php/lib/Schema/AvroUnionSchema.php | 123 ++
lang/php/lib/autoload.php | 61 +
lang/php/lib/avro.php | 201 ---
lang/php/lib/avro/data_file.php | 601 --------
lang/php/lib/avro/datum.php | 984 -------------
lang/php/lib/avro/debug.php | 194 ---
lang/php/lib/avro/gmp.php | 222 ---
lang/php/lib/avro/io.php | 494 -------
lang/php/lib/avro/protocol.php | 87 --
lang/php/lib/avro/schema.php | 1457 --------------------
lang/php/lib/avro/util.php | 67 -
lang/php/monorepo.json | 14 +-
lang/php/test/DataFileTest.php | 481 ++++---
lang/php/test/DatumIOTest.php | 284 ++--
lang/php/test/FloatIntEncodingTest.php | 47 +-
lang/php/test/IODatumReaderTest.php | 25 +-
lang/php/test/InterOpTest.php | 88 +-
lang/php/test/LongEncodingTest.php | 841 +++++++----
lang/php/test/NameTest.php | 154 ++-
lang/php/test/ProtocolFileTest.php | 81 +-
lang/php/test/SchemaTest.php | 389 +++---
lang/php/test/StringIOTest.php | 120 +-
lang/php/test/generate_interop_data.php | 61 +-
lang/php/test/test_helper.php | 8 +-
65 files changed, 6816 insertions(+), 5527 deletions(-)
diff --git a/.gitignore b/.gitignore
index 93379c0..333dfc0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,3 +26,4 @@ test-output
.coverage
vendor
composer.lock
+.phpunit.result.cache
diff --git a/composer.json b/composer.json
index f214cda..cf4b412 100644
--- a/composer.json
+++ b/composer.json
@@ -7,6 +7,7 @@
"beberlei/composer-monorepo-plugin": "^0.12"
},
"require-dev": {
- "phpunit/phpunit": "^9.1"
+ "phpunit/phpunit": "^9.1",
+ "squizlabs/php_codesniffer": "^3.5"
}
}
diff --git a/lang/php/README.md b/lang/php/README.md
new file mode 100644
index 0000000..98cfd29
--- /dev/null
+++ b/lang/php/README.md
@@ -0,0 +1,53 @@
+|[![AVRO](https://raw.githubusercontent.com/apache/avro/master/doc/src/resources/images/avro-logo.png)](https://github.com/apache/avro) | [![AVRO](https://raw.githubusercontent.com/apache/avro/master/doc/src/resources/images/apache_feather.gif)](https://github.com/apac<he/avro)|
+|:-----|-----:|
+
+What the Avro PHP library is
+============================
+
+A library for using [Avro](https://avro.apache.org/) with PHP.
+
+Requirements
+============
+ * PHP 7.3+
+ * On 32-bit platforms, the [GMP PHP extension](https://php.net/gmp)
+ * For Zstandard compression, [ext-zstd](https://github.com/kjdev/php-ext-zstd)
+ * For Snappy compression, [ext-snappy](https://github.com/kjdev/php-ext-snappy)
+ * For testing, [PHPUnit](https://www.phpunit.de/)
+
+Both GMP and PHPUnit are often available via package management
+systems as `php7-gmp` and `phpunit`, respectively.
+
+
+Getting started
+===============
+
+## 1. Composer
+
+The preferred method to install Avro. Add `apache/avro` to the require section of
+your project's `composer.json` configuration file, and run `composer install`:
+```json
+{
+ "require-dev": {
+ "apache/avro": "dev-master"
+ }
+}
+```
+
+## 2. Manual Installation
+
+Untar the avro-php distribution, untar it, and put it in your include path:
+
+ tar xjf avro-php.tar.bz2 # avro-php.tar.bz2 is likely avro-php-1.4.0.tar.bz2
+ cp avro-php /path/to/where/you/want/it
+
+Require the `autoload.php` file in your source, and you should be good to go:
+
+ <?php
+ require_once('avro-php/autoload.php');
+
+If you're pulling from source, put `lib/` in your include path and require `lib/avro.php`:
+
+ <?php
+ require_once('lib/autoload.php');
+
+Take a look in `examples/` for usage.
diff --git a/lang/php/README.txt b/lang/php/README.txt
deleted file mode 100644
index d3d7557..0000000
--- a/lang/php/README.txt
+++ /dev/null
@@ -1,45 +0,0 @@
-What the Avro PHP library is
-============================
-
-A library for using [Avro](https://avro.apache.org/) with PHP.
-
-Requirements
-============
- * PHP 7
-
- * NOTE: Currently, the Avro code is supposed to work with PHP 5.6
- as well, but that support may be dropped in a future version.
-
- * On 32-bit platforms, the [GMP PHP extension](https://php.net/gmp)
-
- * For testing, [PHPUnit](https://www.phpunit.de/)
-
- * NOTE: If you use Avro with PHP 5.6, use PHPUnit 5.7, which is the only
- version that works with the 5.6 runtime and the 7.x style test code.
- Otherwise, use PHPUnit 7.x.
-
-Both GMP and PHPUnit are often available via package management
-systems as `php7-gmp` and `phpunit`, respectively.
-But if you use a specific version of PHPUnit as described above,
-download and install it manually without the package manager.
-
-
-Getting started
-===============
-
-Untar the avro-php distribution, untar it, and put it in your include path:
-
- tar xjf avro-php.tar.bz2 # avro-php.tar.bz2 is likely avro-php-1.4.0.tar.bz2
- cp avro-php /path/to/where/you/want/it
-
-Require the avro.php file in your source, and you should be good to go:
-
- <?php
- require_once('avro-php/avro.php');
-
-If you're pulling from source, put `lib/` in your include path and require `lib/avro.php`:
-
- <?php
- require_once('lib/avro.php');
-
-Take a look in `examples/` for usage.
diff --git a/lang/php/build.sh b/lang/php/build.sh
index 6ce536f..3acd0ac 100755
--- a/lang/php/build.sh
+++ b/lang/php/build.sh
@@ -58,6 +58,7 @@ do
lint)
find . -name "*.php" -print0 | xargs -0 -n1 -P8 php -l
+ vendor/bin/phpcs --standard=PSR12 lib
;;
test)
diff --git a/lang/php/examples/write_read.php b/lang/php/examples/write_read.php
index d420da0..b5c1a69 100644
--- a/lang/php/examples/write_read.php
+++ b/lang/php/examples/write_read.php
@@ -18,7 +18,15 @@
* limitations under the License.
*/
-require_once('../lib/avro.php');
+require_once __DIR__ . '/../vendor/autoload.php';
+
+use Apache\Avro\DataFile\AvroDataIO;
+use Apache\Avro\DataFile\AvroDataIOReader;
+use Apache\Avro\DataFile\AvroDataIOWriter;
+use Apache\Avro\Datum\AvroIODatumReader;
+use Apache\Avro\Datum\AvroIODatumWriter;
+use Apache\Avro\IO\AvroStringIO;
+use Apache\Avro\Schema\AvroSchema;
// Write and read a data file
@@ -35,7 +43,7 @@ $data = array($jose, $maria);
$file_name = 'data.avr';
// Open $file_name for writing, using the given writer's schema
-$data_writer = AvroDataIO::open_file($file_name, 'w', $writers_schema_json);
+$data_writer = AvroDataIO::openFile($file_name, 'w', $writers_schema_json);
// Write each datum to the file
foreach ($data as $datum)
@@ -45,7 +53,7 @@ $data_writer->close();
// Open $file_name (by default for reading) using the writer's schema
// included in the file
-$data_reader = AvroDataIO::open_file($file_name);
+$data_reader = AvroDataIO::openFile($file_name);
echo "from file:\n";
// Read each datum
foreach ($data_reader->data() as $datum)
diff --git a/lang/php/lib/Avro.php b/lang/php/lib/Avro.php
new file mode 100644
index 0000000..fc43b49
--- /dev/null
+++ b/lang/php/lib/Avro.php
@@ -0,0 +1,174 @@
+<?php
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Apache\Avro;
+
+/**
+ * Library-level class for PHP Avro port.
+ *
+ * Contains library details such as version number and platform checks.
+ *
+ * This port is an implementation of the
+ * {@link https://avro.apache.org/docs/1.3.3/spec.html Avro 1.3.3 Specification}
+ *
+ * @package Avro
+ */
+class Avro
+{
+ /**
+ * @var string version number of Avro specification to which
+ * this implemenation complies
+ */
+ const SPEC_VERSION = '1.3.3';
+
+ /**#@+
+ * Constant to enumerate endianness.
+ * @access private
+ * @var int
+ */
+ const BIG_ENDIAN = 0x00;
+ const LITTLE_ENDIAN = 0x01;
+ /**#@-*/
+ /**#@+
+ * Constant to enumerate biginteger handling mode.
+ * GMP is used, if available, on 32-bit platforms.
+ */
+ const PHP_BIGINTEGER_MODE = 0x00;
+ const GMP_BIGINTEGER_MODE = 0x01;
+ /**
+ * Memoized result of self::setEndianness()
+ * @var int self::BIG_ENDIAN or self::LITTLE_ENDIAN
+ * @see self::setEndianness()
+ */
+ private static $endianness;
+ /**#@-*/
+ /**
+ * @var int
+ * Mode used to handle bigintegers. After Avro::check64Bit() has been called,
+ * (usually via a call to Avro::checkPlatform(), set to
+ * self::GMP_BIGINTEGER_MODE on 32-bit platforms that have GMP available,
+ * and to self::PHP_BIGINTEGER_MODE otherwise.
+ */
+ private static $biginteger_mode;
+
+ /**
+ * Wrapper method to call each required check.
+ *
+ */
+ public static function checkPlatform()
+ {
+ self::check64Bit();
+ self::checkLittleEndian();
+ }
+
+ /**
+ * Determines if the host platform can encode and decode long integer data.
+ *
+ * @throws AvroException if the platform cannot handle long integers.
+ */
+ private static function check64Bit()
+ {
+ if (8 != PHP_INT_SIZE) {
+ if (extension_loaded('gmp')) {
+ self::$biginteger_mode = self::GMP_BIGINTEGER_MODE;
+ } else {
+ throw new AvroException('This platform cannot handle a 64-bit operations. '
+ . 'Please install the GMP PHP extension.');
+ }
+ } else {
+ self::$biginteger_mode = self::PHP_BIGINTEGER_MODE;
+ }
+ }
+
+ /**
+ * Determines if the host platform is little endian,
+ * required for processing double and float data.
+ *
+ * @throws AvroException if the platform is not little endian.
+ */
+ private static function checkLittleEndian()
+ {
+ if (!self::isLittleEndianPlatform()) {
+ throw new AvroException('This is not a little-endian platform');
+ }
+ }
+
+ /**
+ * @returns boolean true if the host platform is little endian,
+ * and false otherwise.
+ * @uses self::is_bin_endian_platform()
+ */
+ private static function isLittleEndianPlatform()
+ {
+ return !self::isBigEndianPlatform();
+ }
+
+ /**
+ * @returns boolean true if the host platform is big endian
+ * and false otherwise.
+ * @uses self::setEndianness()
+ */
+ private static function isBigEndianPlatform()
+ {
+ if (is_null(self::$endianness)) {
+ self::setEndianness();
+ }
+
+ return (self::BIG_ENDIAN == self::$endianness);
+ }
+
+ /**
+ * Determines the endianness of the host platform and memoizes
+ * the result to Avro::$endianness.
+ *
+ * Based on a similar check perfomed in https://pear.php.net/package/Math_BinaryUtils
+ *
+ * @throws AvroException if the endianness cannot be determined.
+ */
+ private static function setEndianness()
+ {
+ $packed = pack('d', 1);
+ switch ($packed) {
+ case "\77\360\0\0\0\0\0\0":
+ self::$endianness = self::BIG_ENDIAN;
+ break;
+ case "\0\0\0\0\0\0\360\77":
+ self::$endianness = self::LITTLE_ENDIAN;
+ break;
+ default:
+ throw new AvroException(
+ sprintf(
+ 'Error determining platform endianness: %s',
+ AvroDebug::hexString($packed)
+ )
+ );
+ }
+ }
+
+ /**
+ * @returns boolean true if the PHP GMP extension is used and false otherwise.
+ * @internal Requires Avro::check64Bit() (exposed via Avro::checkPlatform())
+ * to have been called to set Avro::$biginteger_mode.
+ */
+ public static function usesGmp()
+ {
+ return self::GMP_BIGINTEGER_MODE === self::$biginteger_mode;
+ }
+}
diff --git a/lang/php/lib/AvroDebug.php b/lang/php/lib/AvroDebug.php
new file mode 100644
index 0000000..508c0c6
--- /dev/null
+++ b/lang/php/lib/AvroDebug.php
@@ -0,0 +1,231 @@
+<?php
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Apache\Avro;
+
+/**
+ * Avro library code debugging functions
+ * @package Avro
+ */
+class AvroDebug
+{
+ /**
+ * @var int high debug level
+ */
+ const DEBUG5 = 5;
+ /**
+ * @var int low debug level
+ */
+ const DEBUG1 = 1;
+ /**
+ * @var int current debug level
+ */
+ const DEBUG_LEVEL = self::DEBUG1;
+
+ /**
+ * @param string $format format string for the given arguments. Passed as is
+ * to <code>vprintf</code>.
+ * @param array $args array of arguments to pass to vsprinf.
+ * @param int $debug_level debug level at which to print this statement
+ * @returns boolean true
+ */
+ public static function debug($format, $args, $debug_level = self::DEBUG1)
+ {
+ if (self::isDebug($debug_level)) {
+ vprintf($format . "\n", $args);
+ }
+ return true;
+ }
+
+ /**
+ * @var int $debug_level
+ * @returns boolean true if the given $debug_level is equivalent
+ * or more verbose than than the current debug level
+ * and false otherwise.
+ */
+ public static function isDebug($debug_level = self::DEBUG1)
+ {
+ return (self::DEBUG_LEVEL >= $debug_level);
+ }
+
+ /**
+ * @param string $str
+ * @param string $joiner string used to join
+ * @returns string hex-represented bytes of each byte of $str
+ * joined by $joiner
+ */
+ public static function hexString($str, $joiner = ' ')
+ {
+ return implode($joiner, self::hexArray($str));
+ }
+
+ /**
+ * @param string $str
+ * @returns string[] array of hex representation of each byte of $str
+ */
+ public static function hexArray($str)
+ {
+ return self::bytesArray($str);
+ }
+
+ /**
+ * @param string $str
+ * @param string $format format to represent bytes
+ * @returns string[] array of each byte of $str formatted using $format
+ */
+ public static function bytesArray($str, $format = 'x%02x')
+ {
+ $x = array();
+ foreach (str_split($str) as $b) {
+ $x[] = sprintf($format, ord($b));
+ }
+ return $x;
+ }
+
+ /**
+ * @param string $str
+ * @param string $joiner string to join bytes of $str
+ * @returns string of bytes of $str represented in decimal format
+ * @uses decArray()
+ */
+ public static function decString($str, $joiner = ' ')
+ {
+ return implode($joiner, self::decArray($str));
+ }
+
+ /**
+ * @param string $str
+ * @returns string[] array of bytes of $str represented in decimal format ('%3d')
+ */
+ public static function decArray($str)
+ {
+ return self::bytesArray($str, '%3d');
+ }
+
+ /**
+ * @param string $str
+ * @param string $format one of 'ctrl', 'hex', or 'dec'.
+ * See {@link self::asciiArray()} for more description
+ * @param string $joiner
+ * @returns string of bytes joined by $joiner
+ * @uses asciiArray()
+ */
+ public static function asciiString($str, $format = 'ctrl', $joiner = ' ')
+ {
+ return implode($joiner, self::asciiArray($str, $format));
+ }
+
+ /**
+ * @param string $str
+ * @param string $format one of 'ctrl', 'hex', or 'dec' for control,
+ * hexadecimal, or decimal format for bytes.
+ * - ctrl: ASCII control characters represented as text.
+ * For example, the null byte is represented as 'NUL'.
+ * Visible ASCII characters represent themselves, and
+ * others are represented as a decimal ('%03d')
+ * - hex: bytes represented in hexadecimal ('%02X')
+ * - dec: bytes represented in decimal ('%03d')
+ * @returns string[] array of bytes represented in the given format.
+ * @throws AvroException
+ */
+ public static function asciiArray($str, $format = 'ctrl')
+ {
+ if (!in_array($format, ['ctrl', 'hex', 'dec'])) {
+ throw new AvroException('Unrecognized format specifier');
+ }
+
+ $ctrl_chars = array(
+ 'NUL',
+ 'SOH',
+ 'STX',
+ 'ETX',
+ 'EOT',
+ 'ENQ',
+ 'ACK',
+ 'BEL',
+ 'BS',
+ 'HT',
+ 'LF',
+ 'VT',
+ 'FF',
+ 'CR',
+ 'SO',
+ 'SI',
+ 'DLE',
+ 'DC1',
+ 'DC2',
+ 'DC3',
+ 'DC4',
+ 'NAK',
+ 'SYN',
+ 'ETB',
+ 'CAN',
+ 'EM',
+ 'SUB',
+ 'ESC',
+ 'FS',
+ 'GS',
+ 'RS',
+ 'US'
+ );
+ $x = array();
+ foreach (str_split($str) as $b) {
+ $db = ord($b);
+ if ($db < 32) {
+ switch ($format) {
+ case 'ctrl':
+ $x[] = str_pad($ctrl_chars[$db], 3, ' ', STR_PAD_LEFT);
+ break;
+ case 'hex':
+ $x[] = sprintf("x%02X", $db);
+ break;
+ case 'dec':
+ $x[] = str_pad($db, 3, '0', STR_PAD_LEFT);
+ break;
+ }
+ } else {
+ if ($db < 127) {
+ $x[] = " $b";
+ } else {
+ if ($db == 127) {
+ switch ($format) {
+ case 'ctrl':
+ $x[] = 'DEL';
+ break;
+ case 'hex':
+ $x[] = sprintf("x%02X", $db);
+ break;
+ case 'dec':
+ $x[] = str_pad($db, 3, '0', STR_PAD_LEFT);
+ break;
+ }
+ } else {
+ if ('hex' === $format) {
+ $x[] = sprintf("x%02X", $db);
+ } else {
+ $x[] = str_pad($db, 3, '0', STR_PAD_LEFT);
+ }
+ }
+ }
+ }
+ }
+ return $x;
+ }
+}
diff --git a/lang/php/test/IODatumReaderTest.php b/lang/php/lib/AvroException.php
similarity index 64%
copy from lang/php/test/IODatumReaderTest.php
copy to lang/php/lib/AvroException.php
index 7e1e7d9..32a96c8 100644
--- a/lang/php/test/IODatumReaderTest.php
+++ b/lang/php/lib/AvroException.php
@@ -1,4 +1,5 @@
<?php
+
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
@@ -17,20 +18,12 @@
* limitations under the License.
*/
-require_once('test_helper.php');
+namespace Apache\Avro;
-class IODatumReaderTest extends PHPUnit\Framework\TestCase
+/**
+ * General Avro exceptions.
+ * @package Avro
+ */
+class AvroException extends \Exception
{
-
- public function testSchemaMatching()
- {
- $writers_schema = <<<JSON
- { "type": "map",
- "values": "bytes" }
-JSON;
- $readers_schema = $writers_schema;
- $this->assertTrue(AvroIODatumReader::schemas_match(
- AvroSchema::parse($writers_schema),
- AvroSchema::parse($readers_schema)));
- }
}
diff --git a/lang/php/lib/AvroGMP.php b/lang/php/lib/AvroGMP.php
new file mode 100644
index 0000000..c868cf1
--- /dev/null
+++ b/lang/php/lib/AvroGMP.php
@@ -0,0 +1,234 @@
+<?php
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Apache\Avro;
+
+/**
+ * Methods for handling 64-bit operations using the GMP extension.
+ *
+ * This is a naive and hackish implementation that is intended
+ * to work well enough to support Avro. It has not been tested
+ * beyond what's needed to decode and encode long values.
+ *
+ * @package Avro
+ */
+class AvroGMP
+{
+ /**
+ * @var resource memoized GMP resource for zero
+ */
+ private static $gmp_0;
+ /**
+ * @var resource memoized GMP resource for one (1)
+ */
+ private static $gmp_1;
+ /**
+ * @var resource memoized GMP resource for two (2)
+ */
+ private static $gmp_2;
+ /**
+ * @var resource memoized GMP resource for 0x7f
+ */
+ private static $gmp_0x7f;
+ /**
+ * @var resource memoized GMP resource for 64-bit ~0x7f
+ */
+ private static $gmp_n0x7f;
+ /**
+ * @var resource memoized GMP resource for 64-bits of 1
+ */
+ private static $gmp_0xfs;
+
+ /**
+ * @param int|str $n integer (or string representation of integer) to encode
+ * @return string $bytes of the long $n encoded per the Avro spec
+ */
+ public static function encodeLong($n)
+ {
+ $g = gmp_init($n);
+ $g = gmp_xor(
+ self::shiftLeft($g, 1),
+ self::shiftRight($g, 63)
+ );
+ $bytes = '';
+ while (0 != gmp_cmp(self::gmp_0(), gmp_and($g, self::gmp_n0x7f()))) {
+ $bytes .= chr(gmp_intval(gmp_and($g, self::gmp_0x7f())) | 0x80);
+ $g = self::shiftRight($g, 7);
+ }
+ $bytes .= chr(gmp_intval($g));
+ return $bytes;
+ }
+
+ /**
+ * @interal Only works up to shift 63 (doesn't wrap bits around).
+ * @param resource|int|string $g
+ * @param int $shift number of bits to shift left
+ * @returns resource $g shifted left
+ */
+ public static function shiftLeft($g, $shift)
+ {
+ if (0 == $shift) {
+ return $g;
+ }
+
+ if (0 > gmp_sign($g)) {
+ $g = self::gmpTwosComplement($g);
+ }
+
+ $m = gmp_mul($g, gmp_pow(self::gmp_2(), $shift));
+ $m = gmp_and($m, self::gmp_0xfs());
+ if (gmp_testbit($m, 63)) {
+ $m = gmp_neg(gmp_add(
+ gmp_and(gmp_com($m), self::gmp_0xfs()),
+ self::gmp_1()
+ ));
+ }
+ return $m;
+ }
+
+ /**
+ * @param GMP resource
+ * @returns GMP resource 64-bit two's complement of input.
+ */
+ public static function gmpTwosComplement($g)
+ {
+ return gmp_neg(gmp_sub(gmp_pow(self::gmp_2(), 64), $g));
+ }
+
+ /**
+ * Arithmetic right shift
+ * @param resource|int|string $g
+ * @param int $shift number of bits to shift right
+ * @returns resource $g shifted right $shift bits
+ */
+ public static function shiftRight($g, $shift)
+ {
+ if (0 == $shift) {
+ return $g;
+ }
+
+ if (0 <= gmp_sign($g)) {
+ $m = gmp_div($g, gmp_pow(self::gmp_2(), $shift));
+ } else // negative
+ {
+ $g = gmp_and($g, self::gmp_0xfs());
+ $m = gmp_div($g, gmp_pow(self::gmp_2(), $shift));
+ $m = gmp_and($m, self::gmp_0xfs());
+ for ($i = 63; $i >= (63 - $shift); $i--) {
+ gmp_setbit($m, $i);
+ }
+
+ $m = gmp_neg(gmp_add(
+ gmp_and(gmp_com($m), self::gmp_0xfs()),
+ self::gmp_1()
+ ));
+ }
+
+ return $m;
+ }
+
+ // phpcs:disable PSR1.Methods.CamelCapsMethodName
+
+ /**
+ * @returns resource GMP resource for two (2)
+ */
+ private static function gmp_2()
+ {
+ if (!isset(self::$gmp_2)) {
+ self::$gmp_2 = gmp_init('2');
+ }
+ return self::$gmp_2;
+ }
+
+ /**
+ * @returns resource GMP resource for 64-bits of 1
+ */
+ private static function gmp_0xfs()
+ {
+ if (!isset(self::$gmp_0xfs)) {
+ self::$gmp_0xfs = gmp_init('0xffffffffffffffff');
+ }
+ return self::$gmp_0xfs;
+ }
+
+ /**
+ * @returns resource GMP resource for one (1)
+ */
+ private static function gmp_1()
+ {
+ if (!isset(self::$gmp_1)) {
+ self::$gmp_1 = gmp_init('1');
+ }
+ return self::$gmp_1;
+ }
+
+ /**
+ * @returns resource GMP resource for zero
+ */
+ private static function gmp_0()
+ {
+ if (!isset(self::$gmp_0)) {
+ self::$gmp_0 = gmp_init('0');
+ }
+ return self::$gmp_0;
+ }
+
+ /**
+ * @returns resource GMP resource for 64-bit ~0x7f
+ */
+ private static function gmp_n0x7f()
+ {
+ if (!isset(self::$gmp_n0x7f)) {
+ self::$gmp_n0x7f = gmp_init('0xffffffffffffff80');
+ }
+ return self::$gmp_n0x7f;
+ }
+
+ /**
+ * @returns resource GMP resource for 0x7f
+ */
+ private static function gmp_0x7f()
+ {
+ if (!isset(self::$gmp_0x7f)) {
+ self::$gmp_0x7f = gmp_init('0x7f');
+ }
+ return self::$gmp_0x7f;
+ }
+
+ // phpcs:enable
+
+ /**
+ * @param int[] $bytes array of ascii codes of bytes to decode
+ * @return string represenation of decoded long.
+ */
+ public static function decodeLongFromArray($bytes)
+ {
+ $b = array_shift($bytes);
+ $g = gmp_init($b & 0x7f);
+ $shift = 7;
+ while (0 != ($b & 0x80)) {
+ $b = array_shift($bytes);
+ $g = gmp_or($g, self::shiftLeft(($b & 0x7f), $shift));
+ $shift += 7;
+ }
+ $val = gmp_xor(self::shiftRight($g, 1), gmp_neg(gmp_and($g, 1)));
+ return gmp_strval($val);
+ }
+}
diff --git a/lang/php/lib/AvroIO.php b/lang/php/lib/AvroIO.php
new file mode 100644
index 0000000..3474318
--- /dev/null
+++ b/lang/php/lib/AvroIO.php
@@ -0,0 +1,130 @@
+<?php
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Apache\Avro;
+
+/**
+ * Barebones IO base class to provide common interface for file and string
+ * access within the Avro classes.
+ *
+ * @package Avro
+ */
+class AvroIO
+{
+ /**
+ * @var string general read mode
+ */
+ const READ_MODE = 'r';
+ /**
+ * @var string general write mode.
+ */
+ const WRITE_MODE = 'w';
+
+ /**
+ * @var int set position to current index + $offset bytes
+ */
+ const SEEK_CUR = SEEK_CUR;
+ /**
+ * @var int set position equal to $offset bytes
+ */
+ const SEEK_SET = SEEK_SET;
+ /**
+ * @var int set position to end of file + $offset bytes
+ */
+ const SEEK_END = SEEK_END;
+
+ /**
+ * Read $len bytes from AvroIO instance
+ * @return string bytes read
+ * @var int $len
+ */
+ public function read($len)
+ {
+ throw new AvroNotImplementedException('Not implemented');
+ }
+
+ /**
+ * Append bytes to this buffer. (Nothing more is needed to support Avro.)
+ * @param string $arg bytes to write
+ * @returns int count of bytes written.
+ * @throws IO\AvroIOException if $args is not a string value.
+ */
+ public function write($arg)
+ {
+ throw new AvroNotImplementedException('Not implemented');
+ }
+
+ /**
+ * Return byte offset within AvroIO instance
+ * @return int
+ */
+ public function tell()
+ {
+ throw new AvroNotImplementedException('Not implemented');
+ }
+
+ /**
+ * Set the position indicator. The new position, measured in bytes
+ * from the beginning of the file, is obtained by adding $offset to
+ * the position specified by $whence.
+ *
+ * @param int $offset
+ * @param int $whence one of AvroIO::SEEK_SET, AvroIO::SEEK_CUR,
+ * or Avro::SEEK_END
+ * @returns boolean true
+ *
+ * @throws IO\AvroIOException
+ */
+ public function seek($offset, $whence = self::SEEK_SET): bool
+ {
+ throw new AvroNotImplementedException('Not implemented');
+ }
+
+ /**
+ * Flushes any buffered data to the AvroIO object.
+ * @returns boolean true upon success.
+ */
+ public function flush()
+ {
+ throw new AvroNotImplementedException('Not implemented');
+ }
+
+ /**
+ * Returns whether or not the current position at the end of this AvroIO
+ * instance.
+ *
+ * Note isEof() is <b>not</b> like eof in C or feof in PHP:
+ * it returns TRUE if the *next* read would be end of file,
+ * rather than if the *most recent* read read end of file.
+ * @returns boolean true if at the end of file, and false otherwise
+ */
+ public function isEof()
+ {
+ throw new AvroNotImplementedException('Not implemented');
+ }
+
+ /**
+ * Closes this AvroIO instance.
+ */
+ public function close()
+ {
+ throw new AvroNotImplementedException('Not implemented');
+ }
+}
diff --git a/lang/php/test/IODatumReaderTest.php b/lang/php/lib/AvroNotImplementedException.php
similarity index 64%
copy from lang/php/test/IODatumReaderTest.php
copy to lang/php/lib/AvroNotImplementedException.php
index 7e1e7d9..a56f17f 100644
--- a/lang/php/test/IODatumReaderTest.php
+++ b/lang/php/lib/AvroNotImplementedException.php
@@ -1,4 +1,5 @@
<?php
+
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
@@ -17,20 +18,12 @@
* limitations under the License.
*/
-require_once('test_helper.php');
+namespace Apache\Avro;
-class IODatumReaderTest extends PHPUnit\Framework\TestCase
+/**
+ * Avro "not implemented method" exception.
+ * @package Avro
+ */
+class AvroNotImplementedException extends AvroException
{
-
- public function testSchemaMatching()
- {
- $writers_schema = <<<JSON
- { "type": "map",
- "values": "bytes" }
-JSON;
- $readers_schema = $writers_schema;
- $this->assertTrue(AvroIODatumReader::schemas_match(
- AvroSchema::parse($writers_schema),
- AvroSchema::parse($readers_schema)));
- }
}
diff --git a/lang/php/lib/AvroUtil.php b/lang/php/lib/AvroUtil.php
new file mode 100644
index 0000000..99a2c68
--- /dev/null
+++ b/lang/php/lib/AvroUtil.php
@@ -0,0 +1,65 @@
+<?php
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Apache\Avro;
+
+/**
+ * Class for static utility methods used in Avro.
+ *
+ * @package Avro
+ */
+class AvroUtil
+{
+ /**
+ * Determines whether the given array is an associative array
+ * (what is termed a map, hash, or dictionary in other languages)
+ * or a list (an array with monotonically increasing integer indicies
+ * starting with zero).
+ *
+ * @param array $ary array to test
+ * @returns true if the array is a list and false otherwise.
+ *
+ */
+ public static function isList($ary)
+ {
+ if (is_array($ary)) {
+ $i = 0;
+ foreach ($ary as $k => $v) {
+ if ($i !== $k) {
+ return false;
+ }
+ $i++;
+ }
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * @param array $ary
+ * @param string $key
+ * @returns mixed the value of $ary[$key] if it is set,
+ * and null otherwise.
+ */
+ public static function arrayValue($ary, $key)
+ {
+ return $ary[$key] ?? null;
+ }
+}
diff --git a/lang/php/lib/DataFile/AvroDataIO.php b/lang/php/lib/DataFile/AvroDataIO.php
new file mode 100644
index 0000000..bc78a68
--- /dev/null
+++ b/lang/php/lib/DataFile/AvroDataIO.php
@@ -0,0 +1,208 @@
+<?php
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Apache\Avro\DataFile;
+
+use Apache\Avro\AvroIO;
+use Apache\Avro\Datum\AvroIODatumReader;
+use Apache\Avro\Datum\AvroIODatumWriter;
+use Apache\Avro\IO\AvroFile;
+use Apache\Avro\Schema\AvroSchema;
+
+/**
+ * @package Avro
+ */
+class AvroDataIO
+{
+ /**
+ * @var int used in file header
+ */
+ const VERSION = 1;
+
+ /**
+ * @var int count of bytes in synchronization marker
+ */
+ const SYNC_SIZE = 16;
+
+ /**
+ * @var int count of items per block, arbitrarily set to 4000 * SYNC_SIZE
+ * @todo make this value configurable
+ */
+ const SYNC_INTERVAL = 64000;
+
+ /**
+ * @var string map key for datafile metadata codec value
+ */
+ const METADATA_CODEC_ATTR = 'avro.codec';
+
+ /**
+ * @var string map key for datafile metadata schema value
+ */
+ const METADATA_SCHEMA_ATTR = 'avro.schema';
+ /**
+ * @var string JSON for datafile metadata schema
+ */
+ const METADATA_SCHEMA_JSON = '{"type":"map","values":"bytes"}';
+
+ /**
+ * @var string codec value for NULL codec
+ */
+ const NULL_CODEC = 'null';
+
+ /**
+ * @var string codec value for deflate codec
+ */
+ const DEFLATE_CODEC = 'deflate';
+
+ const SNAPPY_CODEC = 'snappy';
+
+ const ZSTANDARD_CODEC = 'zstandard';
+
+ const BZIP2_CODEC = 'bzip2';
+
+ /**
+ * @var array array of valid codec names
+ */
+ private static $validCodecs = [
+ self::NULL_CODEC,
+ self::DEFLATE_CODEC,
+ self::SNAPPY_CODEC,
+ self::ZSTANDARD_CODEC,
+ self::BZIP2_CODEC
+ ];
+
+ /**
+ * @var AvroSchema cached version of metadata schema object
+ */
+ private static $metadataSchema;
+
+ /**
+ * @returns int count of bytes in the initial "magic" segment of the
+ * Avro container file header
+ */
+ public static function magicSize()
+ {
+ return strlen(self::magic());
+ }
+
+ /**
+ * @returns the initial "magic" segment of an Avro container file header.
+ */
+ public static function magic()
+ {
+ return ('Obj' . pack('c', self::VERSION));
+ }
+
+ /**
+ * @returns AvroSchema object of Avro container file metadata.
+ */
+ public static function metadataSchema()
+ {
+ if (is_null(self::$metadataSchema)) {
+ self::$metadataSchema = AvroSchema::parse(self::METADATA_SCHEMA_JSON);
+ }
+ return self::$metadataSchema;
+ }
+
+ /**
+ * @param string $file_path file_path of file to open
+ * @param string $mode one of AvroFile::READ_MODE or AvroFile::WRITE_MODE
+ * @param string $schemaJson JSON of writer's schema
+ * @param string $codec compression codec
+ * @returns AvroDataIOWriter instance of AvroDataIOWriter
+ *
+ * @throws AvroDataIOException if $writers_schema is not provided
+ * or if an invalid $mode is given.
+ */
+ public static function openFile(
+ $file_path,
+ $mode = AvroFile::READ_MODE,
+ $schemaJson = null,
+ $codec = self::NULL_CODEC
+ ) {
+ $schema = !is_null($schemaJson)
+ ? AvroSchema::parse($schemaJson) : null;
+
+ $io = false;
+ switch ($mode) {
+ case AvroFile::WRITE_MODE:
+ if (is_null($schema)) {
+ throw new AvroDataIOException('Writing an Avro file requires a schema.');
+ }
+ $file = new AvroFile($file_path, AvroFile::WRITE_MODE);
+ $io = self::openWriter($file, $schema, $codec);
+ break;
+ case AvroFile::READ_MODE:
+ $file = new AvroFile($file_path, AvroFile::READ_MODE);
+ $io = self::openReader($file, $schema);
+ break;
+ default:
+ throw new AvroDataIOException(
+ sprintf(
+ "Only modes '%s' and '%s' allowed. You gave '%s'.",
+ AvroFile::READ_MODE,
+ AvroFile::WRITE_MODE,
+ $mode
+ )
+ );
+ }
+ return $io;
+ }
+
+ /**
+ * @param AvroIO $io
+ * @param AvroSchema $schema
+ * @param string $codec
+ * @returns AvroDataIOWriter
+ */
+ protected static function openWriter($io, $schema, $codec = self::NULL_CODEC)
+ {
+ $writer = new AvroIODatumWriter($schema);
+ return new AvroDataIOWriter($io, $writer, $schema, $codec);
+ }
+
+ /**
+ * @param AvroIO $io
+ * @param AvroSchema $schema
+ * @returns AvroDataIOReader
+ */
+ protected static function openReader($io, $schema)
+ {
+ $reader = new AvroIODatumReader(null, $schema);
+ return new AvroDataIOReader($io, $reader);
+ }
+
+ /**
+ * @param string $codec
+ * @returns boolean true if $codec is a valid codec value and false otherwise
+ */
+ public static function isValidCodec($codec)
+ {
+ return in_array($codec, self::validCodecs());
+ }
+
+ /**
+ * @returns array array of valid codecs
+ */
+ public static function validCodecs()
+ {
+ return self::$validCodecs;
+ }
+}
diff --git a/lang/php/test/IODatumReaderTest.php b/lang/php/lib/DataFile/AvroDataIOException.php
similarity index 64%
copy from lang/php/test/IODatumReaderTest.php
copy to lang/php/lib/DataFile/AvroDataIOException.php
index 7e1e7d9..25bcb2f 100644
--- a/lang/php/test/IODatumReaderTest.php
+++ b/lang/php/lib/DataFile/AvroDataIOException.php
@@ -1,4 +1,5 @@
<?php
+
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
@@ -17,20 +18,14 @@
* limitations under the License.
*/
-require_once('test_helper.php');
+namespace Apache\Avro\DataFile;
-class IODatumReaderTest extends PHPUnit\Framework\TestCase
-{
+use Apache\Avro\AvroException;
- public function testSchemaMatching()
- {
- $writers_schema = <<<JSON
- { "type": "map",
- "values": "bytes" }
-JSON;
- $readers_schema = $writers_schema;
- $this->assertTrue(AvroIODatumReader::schemas_match(
- AvroSchema::parse($writers_schema),
- AvroSchema::parse($readers_schema)));
- }
+/**
+ * Raised when something unkind happens with respect to AvroDataIO.
+ * @package Avro
+ */
+class AvroDataIOException extends AvroException
+{
}
diff --git a/lang/php/lib/DataFile/AvroDataIOReader.php b/lang/php/lib/DataFile/AvroDataIOReader.php
new file mode 100644
index 0000000..0dc4dd0
--- /dev/null
+++ b/lang/php/lib/DataFile/AvroDataIOReader.php
@@ -0,0 +1,249 @@
+<?php
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Apache\Avro\DataFile;
+
+use Apache\Avro\AvroException;
+use Apache\Avro\AvroIO;
+use Apache\Avro\AvroUtil;
+use Apache\Avro\Datum\AvroIOBinaryDecoder;
+use Apache\Avro\Datum\AvroIODatumReader;
+use Apache\Avro\IO\AvroStringIO;
+use Apache\Avro\Schema\AvroSchema;
+
+/**
+ *
+ * Reads Avro data from an AvroIO source using an AvroSchema.
+ * @package Avro
+ */
+class AvroDataIOReader
+{
+ /**
+ * @var string
+ */
+ public $sync_marker;
+ /**
+ * @var array object container metadata
+ */
+ public $metadata;
+ /**
+ * @var AvroIO
+ */
+ private $io;
+ /**
+ * @var AvroIOBinaryDecoder
+ */
+ private $decoder;
+ /**
+ * @var AvroIODatumReader
+ */
+ private $datum_reader;
+ /**
+ * @var int count of items in block
+ */
+ private $block_count;
+
+ /**
+ * @var compression codec
+ */
+ private $codec;
+
+ /**
+ * @param AvroIO $io source from which to read
+ * @param AvroIODatumReader $datum_reader reader that understands
+ * the data schema
+ * @throws AvroDataIOException if $io is not an instance of AvroIO
+ * or the codec specified in the header
+ * is not supported
+ * @uses readHeader()
+ */
+ public function __construct($io, $datum_reader)
+ {
+
+ if (!($io instanceof AvroIO)) {
+ throw new AvroDataIOException('io must be instance of AvroIO');
+ }
+
+ $this->io = $io;
+ $this->decoder = new AvroIOBinaryDecoder($this->io);
+ $this->datum_reader = $datum_reader;
+ $this->readHeader();
+
+ $codec = $this->metadata[AvroDataIO::METADATA_CODEC_ATTR] ?? null;
+ if ($codec && !AvroDataIO::isValidCodec($codec)) {
+ throw new AvroDataIOException(sprintf('Unknown codec: %s', $codec));
+ }
+ $this->codec = $codec;
+
+ $this->block_count = 0;
+ // FIXME: Seems unsanitary to set writers_schema here.
+ // Can't constructor take it as an argument?
+ $this->datum_reader->setWritersSchema(
+ AvroSchema::parse($this->metadata[AvroDataIO::METADATA_SCHEMA_ATTR])
+ );
+ }
+
+ /**
+ * Reads header of object container
+ * @throws AvroDataIOException if the file is not an Avro data file.
+ */
+ private function readHeader()
+ {
+ $this->seek(0, AvroIO::SEEK_SET);
+
+ $magic = $this->read(AvroDataIO::magicSize());
+
+ if (strlen($magic) < AvroDataIO::magicSize()) {
+ throw new AvroDataIOException(
+ 'Not an Avro data file: shorter than the Avro magic block'
+ );
+ }
+
+ if (AvroDataIO::magic() != $magic) {
+ throw new AvroDataIOException(
+ sprintf(
+ 'Not an Avro data file: %s does not match %s',
+ $magic,
+ AvroDataIO::magic()
+ )
+ );
+ }
+
+ $this->metadata = $this->datum_reader->readData(
+ AvroDataIO::metadataSchema(),
+ AvroDataIO::metadataSchema(),
+ $this->decoder
+ );
+ $this->sync_marker = $this->read(AvroDataIO::SYNC_SIZE);
+ }
+
+ /**
+ * @uses AvroIO::seek()
+ */
+ private function seek($offset, $whence)
+ {
+ return $this->io->seek($offset, $whence);
+ }
+
+ /**
+ * @uses AvroIO::read()
+ */
+ private function read($len)
+ {
+ return $this->io->read($len);
+ }
+
+ /**
+ * @internal Would be nice to implement data() as an iterator, I think
+ * @returns array of data from object container.
+ */
+ public function data()
+ {
+ $data = [];
+ $decoder = $this->decoder;
+ while (true) {
+ if (0 == $this->block_count) {
+ if ($this->isEof()) {
+ break;
+ }
+
+ if ($this->skipSync()) {
+ if ($this->isEof()) {
+ break;
+ }
+ }
+
+ $length = $this->readBlockHeader();
+ if ($this->codec == AvroDataIO::DEFLATE_CODEC) {
+ $compressed = $decoder->read($length);
+ $datum = gzinflate($compressed);
+ $decoder = new AvroIOBinaryDecoder(new AvroStringIO($datum));
+ } elseif ($this->codec === AvroDataIO::ZSTANDARD_CODEC) {
+ if (!extension_loaded('zstd')) {
+ throw new AvroException('Please install ext-zstd to use zstandard compression.');
+ }
+ $compressed = $decoder->read($length);
+ $datum = zstd_uncompress($compressed);
+ $decoder = new AvroIOBinaryDecoder(new AvroStringIO($datum));
+ } elseif ($this->codec === AvroDataIO::SNAPPY_CODEC) {
+ if (!extension_loaded('snappy')) {
+ throw new AvroException('Please install ext-snappy to use snappy compression.');
+ }
+ $compressed = $decoder->read($length);
+ $crc32 = unpack('N', substr($compressed, -4))[1];
+ $datum = snappy_uncompress(substr($compressed, 0, -4));
+ if ($crc32 === crc32($datum)) {
+ $decoder = new AvroIOBinaryDecoder(new AvroStringIO($datum));
+ } else {
+ $decoder = new AvroIOBinaryDecoder(new AvroStringIO(snappy_uncompress($datum)));
+ }
+ } elseif ($this->codec === AvroDataIO::BZIP2_CODEC) {
+ if (!extension_loaded('bz2')) {
+ throw new AvroException('Please install ext-bz2 to use bzip2 compression.');
+ }
+ $compressed = $decoder->read($length);
+ $datum = bzdecompress($compressed);
+ $decoder = new AvroIOBinaryDecoder(new AvroStringIO($datum));
+ }
+ }
+ $data[] = $this->datum_reader->read($decoder);
+ --$this->block_count;
+ }
+ return $data;
+ }
+
+ /**
+ * @uses AvroIO::isEof()
+ */
+ private function isEof()
+ {
+ return $this->io->isEof();
+ }
+
+ private function skipSync()
+ {
+ $proposed_sync_marker = $this->read(AvroDataIO::SYNC_SIZE);
+ if ($proposed_sync_marker != $this->sync_marker) {
+ $this->seek(-AvroDataIO::SYNC_SIZE, AvroIO::SEEK_CUR);
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Reads the block header (which includes the count of items in the block
+ * and the length in bytes of the block)
+ * @returns int length in bytes of the block.
+ */
+ private function readBlockHeader()
+ {
+ $this->block_count = $this->decoder->readLong();
+ return $this->decoder->readLong();
+ }
+
+ /**
+ * Closes this writer (and its AvroIO object.)
+ * @uses AvroIO::close()
+ */
+ public function close()
+ {
+ return $this->io->close();
+ }
+}
diff --git a/lang/php/lib/DataFile/AvroDataIOWriter.php b/lang/php/lib/DataFile/AvroDataIOWriter.php
new file mode 100644
index 0000000..d691425
--- /dev/null
+++ b/lang/php/lib/DataFile/AvroDataIOWriter.php
@@ -0,0 +1,237 @@
+<?php
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Apache\Avro\DataFile;
+
+use Apache\Avro\AvroException;
+use Apache\Avro\AvroIO;
+use Apache\Avro\Datum\AvroIOBinaryEncoder;
+use Apache\Avro\Datum\AvroIODatumReader;
+use Apache\Avro\Datum\AvroIODatumWriter;
+use Apache\Avro\IO\AvroStringIO;
+use Apache\Avro\Schema\AvroSchema;
+
+/**
+ * Writes Avro data to an AvroIO source using an AvroSchema
+ * @package Avro
+ */
+class AvroDataIOWriter
+{
+ /**
+ * @var AvroIO object container where data is written
+ */
+ private $io;
+ /**
+ * @var AvroIOBinaryEncoder encoder for object container
+ */
+ private $encoder;
+ /**
+ * @var AvroIODatumWriter
+ */
+ private $datum_writer;
+ /**
+ * @var AvroStringIO buffer for writing
+ */
+ private $buffer;
+ /**
+ * @var AvroIOBinaryEncoder encoder for buffer
+ */
+ private $buffer_encoder;
+ /**
+ * @var int count of items written to block
+ */
+ private $block_count; // AvroIOBinaryEncoder
+ /**
+ * @var array map of object container metadata
+ */
+ private $metadata;
+ /**
+ * @var string compression codec
+ */
+ private $codec;
+
+ /**
+ * @param AvroIO $io
+ * @param AvroIODatumWriter $datum_writer
+ * @param AvroSchema $writers_schema
+ * @param string $codec
+ */
+ public function __construct($io, $datum_writer, $writers_schema = null, $codec = AvroDataIO::NULL_CODEC)
+ {
+ if (!($io instanceof AvroIO)) {
+ throw new AvroDataIOException('io must be instance of AvroIO');
+ }
+
+ $this->io = $io;
+ $this->encoder = new AvroIOBinaryEncoder($this->io);
+ $this->datum_writer = $datum_writer;
+ $this->buffer = new AvroStringIO();
+ $this->buffer_encoder = new AvroIOBinaryEncoder($this->buffer);
+ $this->block_count = 0;
+ $this->metadata = array();
+
+ if ($writers_schema) {
+ if (!AvroDataIO::isValidCodec($codec)) {
+ throw new AvroDataIOException(
+ sprintf('codec %s is not supported', $codec)
+ );
+ }
+
+ $this->sync_marker = self::generateSyncMarker();
+ $this->metadata[AvroDataIO::METADATA_CODEC_ATTR] = $this->codec = $codec;
+ $this->metadata[AvroDataIO::METADATA_SCHEMA_ATTR] = (string) $writers_schema;
+ $this->writeHeader();
+ } else {
+ $dfr = new AvroDataIOReader($this->io, new AvroIODatumReader());
+ $this->sync_marker = $dfr->sync_marker;
+ $this->metadata[AvroDataIO::METADATA_CODEC_ATTR] = $this->codec
+ = $dfr->metadata[AvroDataIO::METADATA_CODEC_ATTR];
+ $schema_from_file = $dfr->metadata[AvroDataIO::METADATA_SCHEMA_ATTR];
+ $this->metadata[AvroDataIO::METADATA_SCHEMA_ATTR] = $schema_from_file;
+ $this->datum_writer->writersSchema = AvroSchema::parse($schema_from_file);
+ $this->seek(0, SEEK_END);
+ }
+ }
+
+ /**
+ * @returns string a new, unique sync marker.
+ */
+ private static function generateSyncMarker()
+ {
+ // From https://php.net/manual/en/function.mt-rand.php comments
+ return pack(
+ 'S8',
+ random_int(0, 0xffff),
+ random_int(0, 0xffff),
+ random_int(0, 0xffff),
+ random_int(0, 0xffff) | 0x4000,
+ random_int(0, 0xffff) | 0x8000,
+ random_int(0, 0xffff),
+ random_int(0, 0xffff),
+ random_int(0, 0xffff)
+ );
+ }
+
+ /**
+ * Writes the header of the AvroIO object container
+ */
+ private function writeHeader()
+ {
+ $this->write(AvroDataIO::magic());
+ $this->datum_writer->writeData(
+ AvroDataIO::metadataSchema(),
+ $this->metadata,
+ $this->encoder
+ );
+ $this->write($this->sync_marker);
+ }
+
+ /**
+ * @param string $bytes
+ * @uses AvroIO::write()
+ */
+ private function write($bytes)
+ {
+ return $this->io->write($bytes);
+ }
+
+ /**
+ * @param int $offset
+ * @param int $whence
+ * @uses AvroIO::seek()
+ */
+ private function seek($offset, $whence)
+ {
+ return $this->io->seek($offset, $whence);
+ }
+
+ /**
+ * @param mixed $datum
+ */
+ public function append($datum)
+ {
+ $this->datum_writer->write($datum, $this->buffer_encoder);
+ $this->block_count++;
+
+ if ($this->buffer->length() >= AvroDataIO::SYNC_INTERVAL) {
+ $this->writeBlock();
+ }
+ }
+
+ /**
+ * Writes a block of data to the AvroIO object container.
+ */
+ private function writeBlock()
+ {
+ if ($this->block_count > 0) {
+ $this->encoder->writeLong($this->block_count);
+ $to_write = (string) $this->buffer;
+
+ if ($this->codec === AvroDataIO::DEFLATE_CODEC) {
+ $to_write = gzdeflate($to_write);
+ } elseif ($this->codec === AvroDataIO::ZSTANDARD_CODEC) {
+ if (!extension_loaded('zstd')) {
+ throw new AvroException('Please install ext-zstd to use zstandard compression.');
+ }
+ $to_write = zstd_compress($to_write);
+ } elseif ($this->codec === AvroDataIO::SNAPPY_CODEC) {
+ if (!extension_loaded('snappy')) {
+ throw new AvroException('Please install ext-snappy to use snappy compression.');
+ }
+ $crc32 = crc32($to_write);
+ $compressed = snappy_compress($to_write);
+ $to_write = pack('a*N', $compressed, $crc32);
+ } elseif ($this->codec === AvroDataIO::BZIP2_CODEC) {
+ if (!extension_loaded('bz2')) {
+ throw new AvroException('Please install ext-bz2 to use bzip2 compression.');
+ }
+ $to_write = bzcompress($to_write);
+ }
+
+ $this->encoder->writeLong(strlen($to_write));
+ $this->write($to_write);
+ $this->write($this->sync_marker);
+ $this->buffer->truncate();
+ $this->block_count = 0;
+ }
+ }
+
+ /**
+ * Flushes buffer to AvroIO object container and closes it.
+ * @return mixed value of $io->close()
+ * @see AvroIO::close()
+ */
+ public function close()
+ {
+ $this->flush();
+ return $this->io->close();
+ }
+
+ /**
+ * Flushes biffer to AvroIO object container.
+ * @returns mixed value of $io->flush()
+ * @see AvroIO::flush()
+ */
+ private function flush()
+ {
+ $this->writeBlock();
+ return $this->io->flush();
+ }
+}
diff --git a/lang/php/lib/Datum/AvroIOBinaryDecoder.php b/lang/php/lib/Datum/AvroIOBinaryDecoder.php
new file mode 100644
index 0000000..8a6a3a2
--- /dev/null
+++ b/lang/php/lib/Datum/AvroIOBinaryDecoder.php
@@ -0,0 +1,266 @@
+<?php
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Apache\Avro\Datum;
+
+// @todo Implement JSON encoding, as is required by the Avro spec.
+use Apache\Avro\Avro;
+use Apache\Avro\AvroException;
+use Apache\Avro\AvroGMP;
+use Apache\Avro\AvroIO;
+
+/**
+ * Decodes and reads Avro data from an AvroIO object encoded using
+ * Avro binary encoding.
+ *
+ * @package Avro
+ */
+class AvroIOBinaryDecoder
+{
+
+ /**
+ * @var AvroIO
+ */
+ private $io;
+
+ /**
+ * @param AvroIO $io object from which to read.
+ */
+ public function __construct($io)
+ {
+ Avro::checkPlatform();
+ $this->io = $io;
+ }
+
+ /**
+ * @returns null
+ */
+ public function readNull()
+ {
+ return null;
+ }
+
+ /**
+ * @returns boolean
+ */
+ public function readBoolean()
+ {
+ return (bool) (1 == ord($this->nextByte()));
+ }
+
+ /**
+ * @returns string the next byte from $this->io.
+ * @throws AvroException if the next byte cannot be read.
+ */
+ private function nextByte()
+ {
+ return $this->read(1);
+ }
+
+ /**
+ * @param int $len count of bytes to read
+ * @returns string
+ */
+ public function read($len)
+ {
+ return $this->io->read($len);
+ }
+
+ /**
+ * @returns int
+ */
+ public function readInt()
+ {
+ return (int) $this->readLong();
+ }
+
+ /**
+ * @returns string|int
+ */
+ public function readLong()
+ {
+ $byte = ord($this->nextByte());
+ $bytes = array($byte);
+ while (0 != ($byte & 0x80)) {
+ $byte = ord($this->nextByte());
+ $bytes [] = $byte;
+ }
+
+ if (Avro::usesGmp()) {
+ return AvroGMP::decodeLongFromArray($bytes);
+ }
+
+ return self::decodeLongFromArray($bytes);
+ }
+
+ /**
+ * @param int[] array of byte ascii values
+ * @returns long decoded value
+ * @internal Requires 64-bit platform
+ */
+ public static function decodeLongFromArray($bytes)
+ {
+ $b = array_shift($bytes);
+ $n = $b & 0x7f;
+ $shift = 7;
+ while (0 != ($b & 0x80)) {
+ $b = array_shift($bytes);
+ $n |= (($b & 0x7f) << $shift);
+ $shift += 7;
+ }
+ return (($n >> 1) ^ -($n & 1));
+ }
+
+ /**
+ * @returns float
+ */
+ public function readFloat()
+ {
+ return self::intBitsToFloat($this->read(4));
+ }
+
+ /**
+ * Performs decoding of the binary string to a float value.
+ *
+ * XXX: This is <b>not</b> endian-aware! See comments in
+ * {@link AvroIOBinaryEncoder::floatToIntBits()} for details.
+ *
+ * @param string $bits
+ * @returns float
+ */
+ public static function intBitsToFloat($bits)
+ {
+ $float = unpack('g', $bits);
+ return (float) $float[1];
+ }
+
+ /**
+ * @returns double
+ */
+ public function readDouble()
+ {
+ return self::longBitsToDouble($this->read(8));
+ }
+
+ /**
+ * Performs decoding of the binary string to a double value.
+ *
+ * XXX: This is <b>not</b> endian-aware! See comments in
+ * {@link AvroIOBinaryEncoder::floatToIntBits()} for details.
+ *
+ * @param string $bits
+ * @returns float
+ */
+ public static function longBitsToDouble($bits)
+ {
+ $double = unpack('e', $bits);
+ return (double) $double[1];
+ }
+
+ /**
+ * A string is encoded as a long followed by that many bytes
+ * of UTF-8 encoded character data.
+ * @returns string
+ */
+ public function readString()
+ {
+ return $this->readBytes();
+ }
+
+ /**
+ * @returns string
+ */
+ public function readBytes()
+ {
+ return $this->read($this->readLong());
+ }
+
+ public function skipNull()
+ {
+ return null;
+ }
+
+ public function skipBoolean()
+ {
+ return $this->skip(1);
+ }
+
+ /**
+ * @param int $len count of bytes to skip
+ * @uses AvroIO::seek()
+ */
+ public function skip($len)
+ {
+ $this->seek($len, AvroIO::SEEK_CUR);
+ }
+
+ /**
+ * @param int $offset
+ * @param int $whence
+ * @returns boolean true upon success
+ * @uses AvroIO::seek()
+ */
+ private function seek($offset, $whence)
+ {
+ return $this->io->seek($offset, $whence);
+ }
+
+ public function skipInt()
+ {
+ return $this->skipLong();
+ }
+
+ public function skipLong()
+ {
+ $b = ord($this->nextByte());
+ while (0 != ($b & 0x80)) {
+ $b = ord($this->nextByte());
+ }
+ }
+
+ public function skipFloat()
+ {
+ return $this->skip(4);
+ }
+
+ public function skipDouble()
+ {
+ return $this->skip(8);
+ }
+
+ public function skipString()
+ {
+ return $this->skipBytes();
+ }
+
+ public function skipBytes()
+ {
+ return $this->skip($this->readLong());
+ }
+
+ /**
+ * @returns int position of pointer in AvroIO instance
+ * @uses AvroIO::tell()
+ */
+ private function tell()
+ {
+ return $this->io->tell();
+ }
+}
diff --git a/lang/php/lib/Datum/AvroIOBinaryEncoder.php b/lang/php/lib/Datum/AvroIOBinaryEncoder.php
new file mode 100644
index 0000000..0a42b74
--- /dev/null
+++ b/lang/php/lib/Datum/AvroIOBinaryEncoder.php
@@ -0,0 +1,179 @@
+<?php
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Apache\Avro\Datum;
+
+use Apache\Avro\Avro;
+use Apache\Avro\AvroGMP;
+use Apache\Avro\AvroIO;
+
+/**
+ * Encodes and writes Avro data to an AvroIO object using
+ * Avro binary encoding.
+ *
+ * @package Avro
+ */
+class AvroIOBinaryEncoder
+{
+ /**
+ * @var AvroIO
+ */
+ private $io;
+
+ /**
+ * @param AvroIO $io object to which data is to be written.
+ *
+ */
+ public function __construct($io)
+ {
+ Avro::checkPlatform();
+ $this->io = $io;
+ }
+
+ /**
+ * @param null $datum actual value is ignored
+ */
+ public function writeNull($datum)
+ {
+ return null;
+ }
+
+ /**
+ * @param boolean $datum
+ */
+ public function writeBoolean($datum)
+ {
+ $byte = $datum ? chr(1) : chr(0);
+ $this->write($byte);
+ }
+
+ /**
+ * @param string $datum
+ */
+ public function write($datum)
+ {
+ $this->io->write($datum);
+ }
+
+ /**
+ * @param int $datum
+ */
+ public function writeInt($datum)
+ {
+ $this->writeLong($datum);
+ }
+
+ /**
+ * @param int $n
+ */
+ public function writeLong($n)
+ {
+ if (Avro::usesGmp()) {
+ $this->write(AvroGMP::encodeLong($n));
+ } else {
+ $this->write(self::encodeLong($n));
+ }
+ }
+
+ /**
+ * @param int|string $n
+ * @returns string long $n encoded as bytes
+ * @internal This relies on 64-bit PHP.
+ */
+ public static function encodeLong($n)
+ {
+ $n = (int) $n;
+ $n = ($n << 1) ^ ($n >> 63);
+ $str = '';
+ while (0 != ($n & ~0x7F)) {
+ $str .= chr(($n & 0x7F) | 0x80);
+ $n >>= 7;
+ }
+ $str .= chr($n);
+ return $str;
+ }
+
+ /**
+ * @param float $datum
+ * @uses self::floatToIntBits()
+ */
+ public function writeFloat($datum)
+ {
+ $this->write(self::floatToIntBits($datum));
+ }
+
+ /**
+ * Performs encoding of the given float value to a binary string
+ *
+ * XXX: This is <b>not</b> endian-aware! The {@link Avro::checkPlatform()}
+ * called in {@link AvroIOBinaryEncoder::__construct()} should ensure the
+ * library is only used on little-endian platforms, which ensure the little-endian
+ * encoding required by the Avro spec.
+ *
+ * @param float $float
+ * @returns string bytes
+ * @see Avro::checkPlatform()
+ */
+ public static function floatToIntBits($float)
+ {
+ return pack('g', (float) $float);
+ }
+
+ /**
+ * @param float $datum
+ * @uses self::doubleToLongBits()
+ */
+ public function writeDouble($datum)
+ {
+ $this->write(self::doubleToLongBits($datum));
+ }
+
+ /**
+ * Performs encoding of the given double value to a binary string
+ *
+ * XXX: This is <b>not</b> endian-aware! See comments in
+ * {@link AvroIOBinaryEncoder::floatToIntBits()} for details.
+ *
+ * @param double $double
+ * @returns string bytes
+ */
+ public static function doubleToLongBits($double)
+ {
+ return pack('e', (double) $double);
+ }
+
+ /**
+ * @param string $str
+ * @uses self::writeBytes()
+ */
+ public function writeString($str)
+ {
+ $this->writeBytes($str);
+ }
+
+ /**
+ * @param string $bytes
+ */
+ public function writeBytes($bytes)
+ {
+ $this->writeLong(strlen($bytes));
+ $this->write($bytes);
+ }
+}
diff --git a/lang/php/lib/Datum/AvroIODatumReader.php b/lang/php/lib/Datum/AvroIODatumReader.php
new file mode 100644
index 0000000..da65f7d
--- /dev/null
+++ b/lang/php/lib/Datum/AvroIODatumReader.php
@@ -0,0 +1,499 @@
+<?php
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Apache\Avro\Datum;
+
+use Apache\Avro\AvroException;
+use Apache\Avro\Schema\AvroSchema;
+
+/**
+ * Handles schema-specifc reading of data from the decoder.
+ *
+ * Also handles schema resolution between the reader and writer
+ * schemas (if a writer's schema is provided).
+ *
+ * @package Avro
+ */
+class AvroIODatumReader
+{
+ /**
+ * @var AvroSchema
+ */
+ private $writers_schema;
+ /**
+ * @var AvroSchema
+ */
+ private $readers_schema;
+
+ /**
+ * @param AvroSchema $writers_schema
+ * @param AvroSchema $readers_schema
+ */
+ public function __construct($writers_schema = null, $readers_schema = null)
+ {
+ $this->writers_schema = $writers_schema;
+ $this->readers_schema = $readers_schema;
+ }
+
+ /**
+ * @param AvroSchema $readers_schema
+ */
+ public function setWritersSchema($readers_schema)
+ {
+ $this->writers_schema = $readers_schema;
+ }
+
+ /**
+ * @param AvroIOBinaryDecoder $decoder
+ * @returns string
+ */
+ public function read($decoder)
+ {
+ if (is_null($this->readers_schema)) {
+ $this->readers_schema = $this->writers_schema;
+ }
+ return $this->readData(
+ $this->writers_schema,
+ $this->readers_schema,
+ $decoder
+ );
+ }
+
+ /**
+ * @returns mixed
+ */
+ public function readData($writers_schema, $readers_schema, $decoder)
+ {
+ if (!self::schemasMatch($writers_schema, $readers_schema)) {
+ throw new AvroIOSchemaMatchException($writers_schema, $readers_schema);
+ }
+
+ // Schema resolution: reader's schema is a union, writer's schema is not
+ if (
+ AvroSchema::UNION_SCHEMA == $readers_schema->type()
+ && AvroSchema::UNION_SCHEMA != $writers_schema->type()
+ ) {
+ foreach ($readers_schema->schemas() as $schema) {
+ if (self::schemasMatch($writers_schema, $schema)) {
+ return $this->readData($writers_schema, $schema, $decoder);
+ }
+ }
+ throw new AvroIOSchemaMatchException($writers_schema, $readers_schema);
+ }
+
+ switch ($writers_schema->type()) {
+ case AvroSchema::NULL_TYPE:
+ return $decoder->readNull();
+ case AvroSchema::BOOLEAN_TYPE:
+ return $decoder->readBoolean();
+ case AvroSchema::INT_TYPE:
+ return $decoder->readInt();
+ case AvroSchema::LONG_TYPE:
+ return $decoder->readLong();
+ case AvroSchema::FLOAT_TYPE:
+ return $decoder->readFloat();
+ case AvroSchema::DOUBLE_TYPE:
+ return $decoder->readDouble();
+ case AvroSchema::STRING_TYPE:
+ return $decoder->readString();
+ case AvroSchema::BYTES_TYPE:
+ return $decoder->readBytes();
+ case AvroSchema::ARRAY_SCHEMA:
+ return $this->readArray($writers_schema, $readers_schema, $decoder);
+ case AvroSchema::MAP_SCHEMA:
+ return $this->readMap($writers_schema, $readers_schema, $decoder);
+ case AvroSchema::UNION_SCHEMA:
+ return $this->readUnion($writers_schema, $readers_schema, $decoder);
+ case AvroSchema::ENUM_SCHEMA:
+ return $this->readEnum($writers_schema, $readers_schema, $decoder);
+ case AvroSchema::FIXED_SCHEMA:
+ return $this->readFixed($writers_schema, $readers_schema, $decoder);
+ case AvroSchema::RECORD_SCHEMA:
+ case AvroSchema::ERROR_SCHEMA:
+ case AvroSchema::REQUEST_SCHEMA:
+ return $this->readRecord($writers_schema, $readers_schema, $decoder);
+ default:
+ throw new AvroException(sprintf(
+ "Cannot read unknown schema type: %s",
+ $writers_schema->type()
+ ));
+ }
+ }
+
+ /**
+ *
+ * @param AvroSchema $writers_schema
+ * @param AvroSchema $readers_schema
+ * @returns boolean true if the schemas are consistent with
+ * each other and false otherwise.
+ */
+ public static function schemasMatch($writers_schema, $readers_schema)
+ {
+ $writers_schema_type = $writers_schema->type;
+ $readers_schema_type = $readers_schema->type;
+
+ if (
+ AvroSchema::UNION_SCHEMA == $writers_schema_type
+ || AvroSchema::UNION_SCHEMA == $readers_schema_type
+ ) {
+ return true;
+ }
+
+ if ($writers_schema_type == $readers_schema_type) {
+ if (AvroSchema::isPrimitiveType($writers_schema_type)) {
+ return true;
+ }
+
+ switch ($readers_schema_type) {
+ case AvroSchema::MAP_SCHEMA:
+ return self::attributesMatch(
+ $writers_schema->values(),
+ $readers_schema->values(),
+ array(AvroSchema::TYPE_ATTR)
+ );
+ case AvroSchema::ARRAY_SCHEMA:
+ return self::attributesMatch(
+ $writers_schema->items(),
+ $readers_schema->items(),
+ array(AvroSchema::TYPE_ATTR)
+ );
+ case AvroSchema::ENUM_SCHEMA:
+ return self::attributesMatch(
+ $writers_schema,
+ $readers_schema,
+ array(AvroSchema::FULLNAME_ATTR)
+ );
+ case AvroSchema::FIXED_SCHEMA:
+ return self::attributesMatch(
+ $writers_schema,
+ $readers_schema,
+ array(
+ AvroSchema::FULLNAME_ATTR,
+ AvroSchema::SIZE_ATTR
+ )
+ );
+ case AvroSchema::RECORD_SCHEMA:
+ case AvroSchema::ERROR_SCHEMA:
+ return self::attributesMatch(
+ $writers_schema,
+ $readers_schema,
+ array(AvroSchema::FULLNAME_ATTR)
+ );
+ case AvroSchema::REQUEST_SCHEMA:
+ // XXX: This seems wrong
+ return true;
+ // XXX: no default
+ }
+
+ if (
+ AvroSchema::INT_TYPE == $writers_schema_type
+ && in_array($readers_schema_type, array(
+ AvroSchema::LONG_TYPE,
+ AvroSchema::FLOAT_TYPE,
+ AvroSchema::DOUBLE_TYPE
+ ))
+ ) {
+ return true;
+ }
+
+ if (
+ AvroSchema::LONG_TYPE == $writers_schema_type
+ && in_array($readers_schema_type, array(
+ AvroSchema::FLOAT_TYPE,
+ AvroSchema::DOUBLE_TYPE
+ ))
+ ) {
+ return true;
+ }
+
+ if (
+ AvroSchema::FLOAT_TYPE == $writers_schema_type
+ && AvroSchema::DOUBLE_TYPE == $readers_schema_type
+ ) {
+ return true;
+ }
+
+ return false;
+ }
+ }
+
+ /**#@+
+ * @param AvroSchema $writers_schema
+ * @param AvroSchema $readers_schema
+ * @param AvroIOBinaryDecoder $decoder
+ */
+
+ /**
+ * Checks equivalence of the given attributes of the two given schemas.
+ *
+ * @param AvroSchema $schema_one
+ * @param AvroSchema $schema_two
+ * @param string[] $attribute_names array of string attribute names to compare
+ *
+ * @returns boolean true if the attributes match and false otherwise.
+ */
+ public static function attributesMatch($schema_one, $schema_two, $attribute_names)
+ {
+ foreach ($attribute_names as $attribute_name) {
+ if (
+ $schema_one->attribute($attribute_name)
+ !== $schema_two->attribute($attribute_name)
+ ) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * @returns array
+ */
+ public function readArray($writers_schema, $readers_schema, $decoder)
+ {
+ $items = array();
+ $block_count = $decoder->readLong();
+ while (0 !== $block_count) {
+ if ($block_count < 0) {
+ $block_count = -$block_count;
+ $block_size = $decoder->read_long(); // Read (and ignore) block size
+ }
+ for ($i = 0; $i < $block_count; $i++) {
+ $items [] = $this->readData(
+ $writers_schema->items(),
+ $readers_schema->items(),
+ $decoder
+ );
+ }
+ $block_count = $decoder->readLong();
+ }
+ return $items;
+ }
+
+ /**
+ * @returns array
+ */
+ public function readMap($writers_schema, $readers_schema, $decoder)
+ {
+ $items = array();
+ $pair_count = $decoder->readLong();
+ while (0 != $pair_count) {
+ if ($pair_count < 0) {
+ $pair_count = -$pair_count;
+ // Note: we're not doing anything with block_size other than skipping it
+ $block_size = $decoder->readLong();
+ }
+
+ for ($i = 0; $i < $pair_count; $i++) {
+ $key = $decoder->readString();
+ $items[$key] = $this->readData(
+ $writers_schema->values(),
+ $readers_schema->values(),
+ $decoder
+ );
+ }
+ $pair_count = $decoder->readLong();
+ }
+ return $items;
+ }
+
+ /**
+ * @returns mixed
+ */
+ public function readUnion($writers_schema, $readers_schema, $decoder)
+ {
+ $schema_index = $decoder->readLong();
+ $selected_writers_schema = $writers_schema->schemaByIndex($schema_index);
+ return $this->readData($selected_writers_schema, $readers_schema, $decoder);
+ }
+
+ /**
+ * @returns string
+ */
+ public function readEnum($writers_schema, $readers_schema, $decoder)
+ {
+ $symbol_index = $decoder->readInt();
+ $symbol = $writers_schema->symbolByIndex($symbol_index);
+ if (!$readers_schema->hasSymbol($symbol)) {
+ null;
+ } // FIXME: unset wrt schema resolution
+ return $symbol;
+ }
+
+ /**
+ * @returns string
+ */
+ public function readFixed($writers_schema, $readers_schema, $decoder)
+ {
+ return $decoder->read($writers_schema->size());
+ }
+
+ /**
+ * @returns array
+ */
+ public function readRecord($writers_schema, $readers_schema, $decoder)
+ {
+ $readers_fields = $readers_schema->fieldsHash();
+ $record = array();
+ foreach ($writers_schema->fields() as $writers_field) {
+ $type = $writers_field->type();
+ if (isset($readers_fields[$writers_field->name()])) {
+ $record[$writers_field->name()]
+ = $this->readData(
+ $type,
+ $readers_fields[$writers_field->name()]->type(),
+ $decoder
+ );
+ } else {
+ $this->skipData($type, $decoder);
+ }
+ }
+ // Fill in default values
+ if (count($readers_fields) > count($record)) {
+ $writers_fields = $writers_schema->fieldsHash();
+ foreach ($readers_fields as $field_name => $field) {
+ if (!isset($writers_fields[$field_name])) {
+ if ($field->hasDefaultValue()) {
+ $record[$field->name()]
+ = $this->readDefaultValue(
+ $field->type(),
+ $field->defaultValue()
+ );
+ } else {
+ null;
+ } // FIXME: unset
+ }
+ }
+ }
+
+ return $record;
+ }
+ /**#@-*/
+
+ /**
+ * @param AvroSchema $writers_schema
+ * @param AvroIOBinaryDecoder $decoder
+ */
+ private function skipData($writers_schema, $decoder)
+ {
+ switch ($writers_schema->type()) {
+ case AvroSchema::NULL_TYPE:
+ return $decoder->skipNull();
+ case AvroSchema::BOOLEAN_TYPE:
+ return $decoder->skipBoolean();
+ case AvroSchema::INT_TYPE:
+ return $decoder->skipInt();
+ case AvroSchema::LONG_TYPE:
+ return $decoder->skipLong();
+ case AvroSchema::FLOAT_TYPE:
+ return $decoder->skipFloat();
+ case AvroSchema::DOUBLE_TYPE:
+ return $decoder->skipDouble();
+ case AvroSchema::STRING_TYPE:
+ return $decoder->skipString();
+ case AvroSchema::BYTES_TYPE:
+ return $decoder->skipBytes();
+ case AvroSchema::ARRAY_SCHEMA:
+ return $decoder->skipArray($writers_schema, $decoder);
+ case AvroSchema::MAP_SCHEMA:
+ return $decoder->skipMap($writers_schema, $decoder);
+ case AvroSchema::UNION_SCHEMA:
+ return $decoder->skipUnion($writers_schema, $decoder);
+ case AvroSchema::ENUM_SCHEMA:
+ return $decoder->skipEnum($writers_schema, $decoder);
+ case AvroSchema::FIXED_SCHEMA:
+ return $decoder->skipFixed($writers_schema, $decoder);
+ case AvroSchema::RECORD_SCHEMA:
+ case AvroSchema::ERROR_SCHEMA:
+ case AvroSchema::REQUEST_SCHEMA:
+ return $decoder->skipRecord($writers_schema, $decoder);
+ default:
+ throw new AvroException(sprintf(
+ 'Unknown schema type: %s',
+ $writers_schema->type()
+ ));
+ }
+ }
+
+ /**
+ * @param AvroSchema $field_schema
+ * @param null|boolean|int|float|string|array $default_value
+ * @returns null|boolean|int|float|string|array
+ *
+ * @throws AvroException if $field_schema type is unknown.
+ */
+ public function readDefaultValue($field_schema, $default_value)
+ {
+ switch ($field_schema->type()) {
+ case AvroSchema::NULL_TYPE:
+ return null;
+ case AvroSchema::BOOLEAN_TYPE:
+ return $default_value;
+ case AvroSchema::INT_TYPE:
+ case AvroSchema::LONG_TYPE:
+ return (int) $default_value;
+ case AvroSchema::FLOAT_TYPE:
+ case AvroSchema::DOUBLE_TYPE:
+ return (float) $default_value;
+ case AvroSchema::STRING_TYPE:
+ case AvroSchema::BYTES_TYPE:
+ return $default_value;
+ case AvroSchema::ARRAY_SCHEMA:
+ $array = array();
+ foreach ($default_value as $json_val) {
+ $val = $this->readDefaultValue($field_schema->items(), $json_val);
+ $array [] = $val;
+ }
+ return $array;
+ case AvroSchema::MAP_SCHEMA:
+ $map = array();
+ foreach ($default_value as $key => $json_val) {
+ $map[$key] = $this->readDefaultValue(
+ $field_schema->values(),
+ $json_val
+ );
+ }
+ return $map;
+ case AvroSchema::UNION_SCHEMA:
+ return $this->readDefaultValue(
+ $field_schema->schemaByIndex(0),
+ $default_value
+ );
+ case AvroSchema::ENUM_SCHEMA:
+ case AvroSchema::FIXED_SCHEMA:
+ return $default_value;
+ case AvroSchema::RECORD_SCHEMA:
+ $record = array();
+ foreach ($field_schema->fields() as $field) {
+ $field_name = $field->name();
+ if (!$json_val = $default_value[$field_name]) {
+ $json_val = $field->default_value();
+ }
+
+ $record[$field_name] = $this->readDefaultValue(
+ $field->type(),
+ $json_val
+ );
+ }
+ return $record;
+ default:
+ throw new AvroException(sprintf('Unknown type: %s', $field_schema->type()));
+ }
+ }
+}
diff --git a/lang/php/lib/Datum/AvroIODatumWriter.php b/lang/php/lib/Datum/AvroIODatumWriter.php
new file mode 100644
index 0000000..7556319
--- /dev/null
+++ b/lang/php/lib/Datum/AvroIODatumWriter.php
@@ -0,0 +1,189 @@
+<?php
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Apache\Avro\Datum;
+
+use Apache\Avro\AvroException;
+use Apache\Avro\Schema\AvroSchema;
+
+/**
+ * Handles schema-specific writing of data to the encoder.
+ *
+ * Ensures that each datum written is consistent with the writer's schema.
+ *
+ * @package Avro
+ */
+class AvroIODatumWriter
+{
+ /**
+ * Schema used by this instance to write Avro data.
+ * @var AvroSchema
+ */
+ public $writersSchema;
+
+ /**
+ * @param AvroSchema $writers_schema
+ */
+ public function __construct($writers_schema = null)
+ {
+ $this->writersSchema = $writers_schema;
+ }
+
+ /**
+ * @param $datum
+ * @param AvroIOBinaryEncoder $encoder
+ */
+ public function write($datum, $encoder)
+ {
+ $this->writeData($this->writersSchema, $datum, $encoder);
+ }
+
+ /**
+ * @param AvroSchema $writers_schema
+ * @param $datum
+ * @param AvroIOBinaryEncoder $encoder
+ * @returns mixed
+ *
+ * @throws AvroIOTypeException if $datum is invalid for $writers_schema
+ */
+ public function writeData($writers_schema, $datum, $encoder)
+ {
+ if (!AvroSchema::isValidDatum($writers_schema, $datum)) {
+ throw new AvroIOTypeException($writers_schema, $datum);
+ }
+
+ switch ($writers_schema->type()) {
+ case AvroSchema::NULL_TYPE:
+ return $encoder->writeNull($datum);
+ case AvroSchema::BOOLEAN_TYPE:
+ return $encoder->writeBoolean($datum);
+ case AvroSchema::INT_TYPE:
+ return $encoder->writeInt($datum);
+ case AvroSchema::LONG_TYPE:
+ return $encoder->writeLong($datum);
+ case AvroSchema::FLOAT_TYPE:
+ return $encoder->writeFloat($datum);
+ case AvroSchema::DOUBLE_TYPE:
+ return $encoder->writeDouble($datum);
+ case AvroSchema::STRING_TYPE:
+ return $encoder->writeString($datum);
+ case AvroSchema::BYTES_TYPE:
+ return $encoder->writeBytes($datum);
+ case AvroSchema::ARRAY_SCHEMA:
+ return $this->writeArray($writers_schema, $datum, $encoder);
+ case AvroSchema::MAP_SCHEMA:
+ return $this->writeMap($writers_schema, $datum, $encoder);
+ case AvroSchema::FIXED_SCHEMA:
+ return $this->writeFixed($writers_schema, $datum, $encoder);
+ case AvroSchema::ENUM_SCHEMA:
+ return $this->writeEnum($writers_schema, $datum, $encoder);
+ case AvroSchema::RECORD_SCHEMA:
+ case AvroSchema::ERROR_SCHEMA:
+ case AvroSchema::REQUEST_SCHEMA:
+ return $this->writeRecord($writers_schema, $datum, $encoder);
+ case AvroSchema::UNION_SCHEMA:
+ return $this->writeUnion($writers_schema, $datum, $encoder);
+ default:
+ throw new AvroException(sprintf(
+ 'Unknown type: %s',
+ $writers_schema->type
+ ));
+ }
+ }
+
+ /**
+ * @param AvroSchema $writers_schema
+ * @param null|boolean|int|float|string|array $datum item to be written
+ * @param AvroIOBinaryEncoder $encoder
+ */
+ private function writeArray($writers_schema, $datum, $encoder)
+ {
+ $datum_count = count($datum);
+ if (0 < $datum_count) {
+ $encoder->writeLong($datum_count);
+ $items = $writers_schema->items();
+ foreach ($datum as $item) {
+ $this->writeData($items, $item, $encoder);
+ }
+ }
+ return $encoder->writeLong(0);
+ }
+
+ /**
+ * @param $writers_schema
+ * @param $datum
+ * @param $encoder
+ * @throws AvroIOTypeException
+ */
+ private function writeMap($writers_schema, $datum, $encoder)
+ {
+ $datum_count = count($datum);
+ if ($datum_count > 0) {
+ $encoder->writeLong($datum_count);
+ foreach ($datum as $k => $v) {
+ $encoder->writeString($k);
+ $this->writeData($writers_schema->values(), $v, $encoder);
+ }
+ }
+ $encoder->writeLong(0);
+ }
+
+ private function writeFixed($writers_schema, $datum, $encoder)
+ {
+ /**
+ * NOTE Unused $writers_schema parameter included for consistency
+ * with other write_* methods.
+ */
+ return $encoder->write($datum);
+ }
+
+ private function writeEnum($writers_schema, $datum, $encoder)
+ {
+ $datum_index = $writers_schema->symbolIndex($datum);
+ return $encoder->writeInt($datum_index);
+ }
+
+ private function writeRecord($writers_schema, $datum, $encoder)
+ {
+ foreach ($writers_schema->fields() as $field) {
+ $this->writeData($field->type(), $datum[$field->name()], $encoder);
+ }
+ }
+
+ private function writeUnion($writers_schema, $datum, $encoder)
+ {
+ $datum_schema_index = -1;
+ $datum_schema = null;
+ foreach ($writers_schema->schemas() as $index => $schema) {
+ if (AvroSchema::isValidDatum($schema, $datum)) {
+ $datum_schema_index = $index;
+ $datum_schema = $schema;
+ break;
+ }
+ }
+
+ if (is_null($datum_schema)) {
+ throw new AvroIOTypeException($writers_schema, $datum);
+ }
+
+ $encoder->writeLong($datum_schema_index);
+ $this->writeData($datum_schema, $datum, $encoder);
+ }
+}
diff --git a/lang/php/test/IODatumReaderTest.php b/lang/php/lib/Datum/AvroIOSchemaMatchException.php
similarity index 55%
copy from lang/php/test/IODatumReaderTest.php
copy to lang/php/lib/Datum/AvroIOSchemaMatchException.php
index 7e1e7d9..373addd 100644
--- a/lang/php/test/IODatumReaderTest.php
+++ b/lang/php/lib/Datum/AvroIOSchemaMatchException.php
@@ -1,4 +1,5 @@
<?php
+
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
@@ -17,20 +18,31 @@
* limitations under the License.
*/
-require_once('test_helper.php');
+namespace Apache\Avro\Datum;
-class IODatumReaderTest extends PHPUnit\Framework\TestCase
-{
+use Apache\Avro\AvroException;
+use Apache\Avro\Schema\AvroSchema;
- public function testSchemaMatching()
- {
- $writers_schema = <<<JSON
- { "type": "map",
- "values": "bytes" }
-JSON;
- $readers_schema = $writers_schema;
- $this->assertTrue(AvroIODatumReader::schemas_match(
- AvroSchema::parse($writers_schema),
- AvroSchema::parse($readers_schema)));
- }
+/**
+ * Exceptions arising from incompatibility between
+ * reader and writer schemas.
+ *
+ * @package Avro
+ */
+class AvroIOSchemaMatchException extends AvroException
+{
+ /**
+ * @param AvroSchema $writers_schema
+ * @param AvroSchema $readers_schema
+ */
+ public function __construct($writers_schema, $readers_schema)
+ {
+ parent::__construct(
+ sprintf(
+ "Writer's schema %s and Reader's schema %s do not match.",
+ $writers_schema,
+ $readers_schema
+ )
+ );
+ }
}
diff --git a/lang/php/test/IODatumReaderTest.php b/lang/php/lib/Datum/AvroIOTypeException.php
similarity index 59%
copy from lang/php/test/IODatumReaderTest.php
copy to lang/php/lib/Datum/AvroIOTypeException.php
index 7e1e7d9..dc7b585 100644
--- a/lang/php/test/IODatumReaderTest.php
+++ b/lang/php/lib/Datum/AvroIOTypeException.php
@@ -1,4 +1,5 @@
<?php
+
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
@@ -17,20 +18,28 @@
* limitations under the License.
*/
-require_once('test_helper.php');
+namespace Apache\Avro\Datum;
-class IODatumReaderTest extends PHPUnit\Framework\TestCase
-{
+use Apache\Avro\AvroException;
+use Apache\Avro\Schema\AvroSchema;
- public function testSchemaMatching()
- {
- $writers_schema = <<<JSON
- { "type": "map",
- "values": "bytes" }
-JSON;
- $readers_schema = $writers_schema;
- $this->assertTrue(AvroIODatumReader::schemas_match(
- AvroSchema::parse($writers_schema),
- AvroSchema::parse($readers_schema)));
- }
+/**
+ * Exceptions arising from writing or reading Avro data.
+ *
+ * @package Avro
+ */
+class AvroIOTypeException extends AvroException
+{
+ /**
+ * @param AvroSchema $expectedSchema
+ * @param mixed $datum
+ */
+ public function __construct($expectedSchema, $datum)
+ {
+ parent::__construct(sprintf(
+ 'The datum %s is not an example of schema %s',
+ var_export($datum, true),
+ $expectedSchema
+ ));
+ }
}
diff --git a/lang/php/lib/IO/AvroFile.php b/lang/php/lib/IO/AvroFile.php
new file mode 100644
index 0000000..ace97a7
--- /dev/null
+++ b/lang/php/lib/IO/AvroFile.php
@@ -0,0 +1,197 @@
+<?php
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Apache\Avro\IO;
+
+use Apache\Avro\AvroIO;
+
+/**
+ * AvroIO wrapper for PHP file access functions
+ * @package Avro
+ */
+class AvroFile extends AvroIO
+{
+ /**
+ * @var string fopen read mode value. Used internally.
+ */
+ const FOPEN_READ_MODE = 'rb';
+
+ /**
+ * @var string fopen write mode value. Used internally.
+ */
+ const FOPEN_WRITE_MODE = 'wb';
+
+ /**
+ * @var string
+ */
+ private $file_path;
+
+ /**
+ * @var resource file handle for AvroFile instance
+ */
+ private $file_handle;
+
+ public function __construct($file_path, $mode = self::READ_MODE)
+ {
+ /**
+ * XXX: should we check for file existence (in case of reading)
+ * or anything else about the provided file_path argument?
+ */
+ $this->file_path = $file_path;
+ switch ($mode) {
+ case self::WRITE_MODE:
+ $this->file_handle = fopen($this->file_path, self::FOPEN_WRITE_MODE);
+ if (false == $this->file_handle) {
+ throw new AvroIOException('Could not open file for writing');
+ }
+ break;
+ case self::READ_MODE:
+ $this->file_handle = fopen($this->file_path, self::FOPEN_READ_MODE);
+ if (false == $this->file_handle) {
+ throw new AvroIOException('Could not open file for reading');
+ }
+ break;
+ default:
+ throw new AvroIOException(
+ sprintf(
+ "Only modes '%s' and '%s' allowed. You provided '%s'.",
+ self::READ_MODE,
+ self::WRITE_MODE,
+ $mode
+ )
+ );
+ }
+ }
+
+ /**
+ * @returns int count of bytes written
+ * @throws AvroIOException if write failed.
+ */
+ public function write($str)
+ {
+ $len = fwrite($this->file_handle, $str);
+ if (false === $len) {
+ throw new AvroIOException(sprintf('Could not write to file'));
+ }
+ return $len;
+ }
+
+ /**
+ * @returns int current position within the file
+ * @throws AvroIOException if tell failed.
+ */
+ public function tell()
+ {
+ $position = ftell($this->file_handle);
+ if (false === $position) {
+ throw new AvroIOException('Could not execute tell on reader');
+ }
+ return $position;
+ }
+
+ /**
+ * Closes the file.
+ * @returns boolean true if successful.
+ * @throws AvroIOException if there was an error closing the file.
+ */
+ public function close()
+ {
+ $res = fclose($this->file_handle);
+ if (false === $res) {
+ throw new AvroIOException('Error closing file.');
+ }
+ return $res;
+ }
+
+ /**
+ * @returns boolean true if the pointer is at the end of the file,
+ * and false otherwise.
+ * @see AvroIO::isEof() as behavior differs from feof()
+ */
+ public function isEof()
+ {
+ $this->read(1);
+ if (feof($this->file_handle)) {
+ return true;
+ }
+ $this->seek(-1, self::SEEK_CUR);
+ return false;
+ }
+
+ /**
+ * @param int $len count of bytes to read.
+ * @returns string bytes read
+ * @throws AvroIOException if length value is negative or if the read failed
+ */
+ public function read($len)
+ {
+ if (0 > $len) {
+ throw new AvroIOException(
+ sprintf("Invalid length value passed to read: %d", $len)
+ );
+ }
+
+ if (0 == $len) {
+ return '';
+ }
+
+ $bytes = fread($this->file_handle, $len);
+ if (false === $bytes) {
+ throw new AvroIOException('Could not read from file');
+ }
+ return $bytes;
+ }
+
+ /**
+ * @param int $offset
+ * @param int $whence
+ * @returns boolean true upon success
+ * @throws AvroIOException if seek failed.
+ * @see AvroIO::seek()
+ */
+ public function seek($offset, $whence = SEEK_SET): bool
+ {
+ $res = fseek($this->file_handle, $offset, $whence);
+ // Note: does not catch seeking beyond end of file
+ if (-1 === $res) {
+ throw new AvroIOException(
+ sprintf(
+ "Could not execute seek (offset = %d, whence = %d)",
+ $offset,
+ $whence
+ )
+ );
+ }
+ return true;
+ }
+
+ /**
+ * @returns boolean true if the flush was successful.
+ * @throws AvroIOException if there was an error flushing the file.
+ */
+ public function flush()
+ {
+ $res = fflush($this->file_handle);
+ if (false === $res) {
+ throw new AvroIOException('Could not flush file.');
+ }
+ return true;
+ }
+}
diff --git a/lang/php/test/IODatumReaderTest.php b/lang/php/lib/IO/AvroIOException.php
similarity index 64%
copy from lang/php/test/IODatumReaderTest.php
copy to lang/php/lib/IO/AvroIOException.php
index 7e1e7d9..e4d531d 100644
--- a/lang/php/test/IODatumReaderTest.php
+++ b/lang/php/lib/IO/AvroIOException.php
@@ -1,4 +1,5 @@
<?php
+
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
@@ -17,20 +18,14 @@
* limitations under the License.
*/
-require_once('test_helper.php');
+namespace Apache\Avro\IO;
-class IODatumReaderTest extends PHPUnit\Framework\TestCase
-{
+use Apache\Avro\AvroException;
- public function testSchemaMatching()
- {
- $writers_schema = <<<JSON
- { "type": "map",
- "values": "bytes" }
-JSON;
- $readers_schema = $writers_schema;
- $this->assertTrue(AvroIODatumReader::schemas_match(
- AvroSchema::parse($writers_schema),
- AvroSchema::parse($readers_schema)));
- }
+/**
+ * Exceptions associated with AvroIO instances.
+ * @package Avro
+ */
+class AvroIOException extends AvroException
+{
}
diff --git a/lang/php/lib/IO/AvroStringIO.php b/lang/php/lib/IO/AvroStringIO.php
new file mode 100644
index 0000000..9b98ff3
--- /dev/null
+++ b/lang/php/lib/IO/AvroStringIO.php
@@ -0,0 +1,252 @@
+<?php
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Apache\Avro\IO;
+
+use Apache\Avro\AvroIO;
+
+/**
+ * AvroIO wrapper for string access
+ * @package Avro
+ */
+class AvroStringIO extends AvroIO
+{
+ /**
+ * @var string
+ */
+ private $string_buffer;
+ /**
+ * @var int current position in string
+ */
+ private $current_index;
+ /**
+ * @var boolean whether or not the string is closed.
+ */
+ private $is_closed;
+
+ /**
+ * @param string $str initial value of AvroStringIO buffer. Regardless
+ * of the initial value, the pointer is set to the
+ * beginning of the buffer.
+ * @throws AvroIOException if a non-string value is passed as $str
+ */
+ public function __construct($str = '')
+ {
+ $this->is_closed = false;
+ $this->string_buffer = '';
+ $this->current_index = 0;
+
+ if (is_string($str)) {
+ $this->string_buffer .= $str;
+ } else {
+ throw new AvroIOException(
+ sprintf('constructor argument must be a string: %s', gettype($str))
+ );
+ }
+ }
+
+ /**
+ * Append bytes to this buffer.
+ * (Nothing more is needed to support Avro.)
+ * @param string $arg bytes to write
+ * @returns int count of bytes written.
+ * @throws AvroIOException if $args is not a string value.
+ */
+ public function write($arg)
+ {
+ $this->checkClosed();
+ if (is_string($arg)) {
+ return $this->appendStr($arg);
+ }
+ throw new AvroIOException(
+ sprintf(
+ 'write argument must be a string: (%s) %s',
+ gettype($arg),
+ var_export($arg, true)
+ )
+ );
+ }
+
+ /**
+ * @throws AvroIOException if the buffer is closed.
+ */
+ private function checkClosed()
+ {
+ if ($this->isClosed()) {
+ throw new AvroIOException('Buffer is closed');
+ }
+ }
+
+ /**
+ * @returns boolean true if this buffer is closed and false
+ * otherwise.
+ */
+ public function isClosed()
+ {
+ return $this->is_closed;
+ }
+
+ /**
+ * Appends bytes to this buffer.
+ * @param string $str
+ * @returns integer count of bytes written.
+ */
+ private function appendStr($str)
+ {
+ $this->checkClosed();
+ $this->string_buffer .= $str;
+ $len = strlen($str);
+ $this->current_index += $len;
+ return $len;
+ }
+
+ /**
+ * @returns string bytes read from buffer
+ * @todo test for fencepost errors wrt updating current_index
+ */
+ public function read($len)
+ {
+ $this->checkClosed();
+ $read = '';
+ for ($i = $this->current_index; $i < ($this->current_index + $len); $i++) {
+ $read .= $this->string_buffer[$i] ?? '';
+ }
+ if (strlen($read) < $len) {
+ $this->current_index = $this->length();
+ } else {
+ $this->current_index += $len;
+ }
+ return $read;
+ }
+
+ /**
+ * @returns int count of bytes in the buffer
+ * @internal Could probably memoize length for performance, but
+ * no need do this yet.
+ */
+ public function length()
+ {
+ return strlen($this->string_buffer);
+ }
+
+ /**
+ * @returns boolean true if successful
+ * @throws AvroIOException if the seek failed.
+ */
+ public function seek($offset, $whence = self::SEEK_SET): bool
+ {
+ if (!is_int($offset)) {
+ throw new AvroIOException('Seek offset must be an integer.');
+ }
+ // Prevent seeking before BOF
+ switch ($whence) {
+ case self::SEEK_SET:
+ if (0 > $offset) {
+ throw new AvroIOException('Cannot seek before beginning of file.');
+ }
+ $this->current_index = $offset;
+ break;
+ case self::SEEK_CUR:
+ if (0 > $this->current_index + $whence) {
+ throw new AvroIOException('Cannot seek before beginning of file.');
+ }
+ $this->current_index += $offset;
+ break;
+ case self::SEEK_END:
+ if (0 > $this->length() + $offset) {
+ throw new AvroIOException('Cannot seek before beginning of file.');
+ }
+ $this->current_index = $this->length() + $offset;
+ break;
+ default:
+ throw new AvroIOException(sprintf('Invalid seek whence %d', $whence));
+ }
+
+ return true;
+ }
+
+ /**
+ * @returns int
+ * @see AvroIO::tell()
+ */
+ public function tell()
+ {
+ return $this->current_index;
+ }
+
+ /**
+ * @returns boolean
+ * @see AvroIO::isEof()
+ */
+ public function isEof()
+ {
+ return ($this->current_index >= $this->length());
+ }
+
+ /**
+ * No-op provided for compatibility with AvroIO interface.
+ * @returns boolean true
+ */
+ public function flush()
+ {
+ return true;
+ }
+
+ /**
+ * Marks this buffer as closed.
+ * @returns boolean true
+ */
+ public function close()
+ {
+ $this->checkClosed();
+ $this->is_closed = true;
+ return true;
+ }
+
+ /**
+ * Truncates the truncate buffer to 0 bytes and returns the pointer
+ * to the beginning of the buffer.
+ * @returns boolean true
+ */
+ public function truncate()
+ {
+ $this->checkClosed();
+ $this->string_buffer = '';
+ $this->current_index = 0;
+ return true;
+ }
+
+ /**
+ * @returns string
+ * @uses self::__toString()
+ */
+ public function string()
+ {
+ return (string) $this;
+ }
+
+ /**
+ * @returns string
+ */
+ public function __toString()
+ {
+ return $this->string_buffer;
+ }
+}
diff --git a/lang/php/lib/Protocol/AvroProtocol.php b/lang/php/lib/Protocol/AvroProtocol.php
new file mode 100644
index 0000000..c05b2ea
--- /dev/null
+++ b/lang/php/lib/Protocol/AvroProtocol.php
@@ -0,0 +1,66 @@
+<?php
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Apache\Avro\Protocol;
+
+use Apache\Avro\Schema\AvroNamedSchemata;
+use Apache\Avro\Schema\AvroSchema;
+
+/**
+ * Avro library for protocols
+ * @package Avro
+ */
+class AvroProtocol
+{
+ public $name;
+ public $namespace;
+ public $schemata;
+ public $messages;
+
+ public static function parse($json)
+ {
+ if (is_null($json)) {
+ throw new AvroProtocolParseException("Protocol can't be null");
+ }
+
+ $protocol = new AvroProtocol();
+ $protocol->realParse(json_decode($json, true));
+ return $protocol;
+ }
+
+ public function realParse($avro)
+ {
+ $this->protocol = $avro["protocol"];
+ $this->namespace = $avro["namespace"];
+ $this->schemata = new AvroNamedSchemata();
+ $this->name = $avro["protocol"];
+
+ if (!is_null($avro["types"])) {
+ $types = AvroSchema::realParse($avro["types"], $this->namespace, $this->schemata);
+ }
+
+ if (!is_null($avro["messages"])) {
+ foreach ($avro["messages"] as $messageName => $messageAvro) {
+ $message = new AvroProtocolMessage($messageName, $messageAvro, $this);
+ $this->messages[$messageName] = $message;
+ }
+ }
+ }
+}
diff --git a/lang/php/lib/Protocol/AvroProtocolMessage.php b/lang/php/lib/Protocol/AvroProtocolMessage.php
new file mode 100644
index 0000000..7068efa
--- /dev/null
+++ b/lang/php/lib/Protocol/AvroProtocolMessage.php
@@ -0,0 +1,59 @@
+<?php
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Apache\Avro\Protocol;
+
+use Apache\Avro\Schema\AvroName;
+use Apache\Avro\Schema\AvroPrimitiveSchema;
+use Apache\Avro\Schema\AvroRecordSchema;
+use Apache\Avro\Schema\AvroSchema;
+
+class AvroProtocolMessage
+{
+ /**
+ * @var AvroRecordSchema $request
+ */
+ public $request;
+
+ public $response;
+
+ public function __construct($name, $avro, $protocol)
+ {
+ $this->name = $name;
+ $this->request = new AvroRecordSchema(
+ new AvroName($name, null, $protocol->namespace),
+ null,
+ $avro['request'],
+ $protocol->schemata,
+ AvroSchema::REQUEST_SCHEMA
+ );
+
+ if (array_key_exists('response', $avro)) {
+ $this->response = $protocol->schemata->schemaByName(new AvroName(
+ $avro['response'],
+ $protocol->namespace,
+ $protocol->namespace
+ ));
+ if ($this->response == null) {
+ $this->response = new AvroPrimitiveSchema($avro['response']);
+ }
+ }
+ }
+}
diff --git a/lang/php/test/IODatumReaderTest.php b/lang/php/lib/Protocol/AvroProtocolParseException.php
similarity index 64%
copy from lang/php/test/IODatumReaderTest.php
copy to lang/php/lib/Protocol/AvroProtocolParseException.php
index 7e1e7d9..8409c46 100644
--- a/lang/php/test/IODatumReaderTest.php
+++ b/lang/php/lib/Protocol/AvroProtocolParseException.php
@@ -1,4 +1,5 @@
<?php
+
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
@@ -17,20 +18,10 @@
* limitations under the License.
*/
-require_once('test_helper.php');
+namespace Apache\Avro\Protocol;
-class IODatumReaderTest extends PHPUnit\Framework\TestCase
-{
+use Apache\Avro\AvroException;
- public function testSchemaMatching()
- {
- $writers_schema = <<<JSON
- { "type": "map",
- "values": "bytes" }
-JSON;
- $readers_schema = $writers_schema;
- $this->assertTrue(AvroIODatumReader::schemas_match(
- AvroSchema::parse($writers_schema),
- AvroSchema::parse($readers_schema)));
- }
+class AvroProtocolParseException extends AvroException
+{
}
diff --git a/lang/php/lib/Schema/AvroArraySchema.php b/lang/php/lib/Schema/AvroArraySchema.php
new file mode 100644
index 0000000..2ba0839
--- /dev/null
+++ b/lang/php/lib/Schema/AvroArraySchema.php
@@ -0,0 +1,88 @@
+<?php
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Apache\Avro\Schema;
+
+/**
+ * Avro array schema, consisting of items of a particular
+ * Avro schema type.
+ * @package Avro
+ */
+class AvroArraySchema extends AvroSchema
+{
+ /**
+ * @var AvroName|AvroSchema named schema name or AvroSchema of
+ * array element
+ */
+ private $items;
+
+ /**
+ * @var boolean true if the items schema
+ * FIXME: couldn't we derive this from whether or not $this->items
+ * is an AvroName or an AvroSchema?
+ */
+ private $is_items_schema_from_schemata;
+
+ /**
+ * @param string|mixed $items AvroNamedSchema name or object form
+ * of decoded JSON schema representation.
+ * @param string $defaultNamespace namespace of enclosing schema
+ * @param AvroNamedSchemata &$schemata
+ */
+ public function __construct($items, $defaultNamespace, &$schemata = null)
+ {
+ parent::__construct(AvroSchema::ARRAY_SCHEMA);
+
+ $this->is_items_schema_from_schemata = false;
+ $items_schema = null;
+ if (
+ is_string($items)
+ && $items_schema = $schemata->schemaByName(
+ new AvroName($items, null, $defaultNamespace)
+ )
+ ) {
+ $this->is_items_schema_from_schemata = true;
+ } else {
+ $items_schema = AvroSchema::subparse($items, $defaultNamespace, $schemata);
+ }
+
+ $this->items = $items_schema;
+ }
+
+ /**
+ * @returns AvroName|AvroSchema named schema name or AvroSchema
+ * of this array schema's elements.
+ */
+ public function items()
+ {
+ return $this->items;
+ }
+
+ /**
+ * @returns mixed
+ */
+ public function toAvro()
+ {
+ $avro = parent::toAvro();
+ $avro[AvroSchema::ITEMS_ATTR] = $this->is_items_schema_from_schemata
+ ? $this->items->qualifiedName() : $this->items->toAvro();
+ return $avro;
+ }
+}
diff --git a/lang/php/lib/Schema/AvroEnumSchema.php b/lang/php/lib/Schema/AvroEnumSchema.php
new file mode 100644
index 0000000..4d3035f
--- /dev/null
+++ b/lang/php/lib/Schema/AvroEnumSchema.php
@@ -0,0 +1,119 @@
+<?php
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Apache\Avro\Schema;
+
+use Apache\Avro\AvroException;
+use Apache\Avro\AvroUtil;
+
+/**
+ * @package Avro
+ */
+class AvroEnumSchema extends AvroNamedSchema
+{
+ /**
+ * @var string[] array of symbols
+ */
+ private $symbols;
+
+ /**
+ * @param AvroName $name
+ * @param string $doc
+ * @param string[] $symbols
+ * @param AvroNamedSchemata &$schemata
+ * @throws AvroSchemaParseException
+ */
+ public function __construct($name, $doc, $symbols, &$schemata = null)
+ {
+ if (!AvroUtil::isList($symbols)) {
+ throw new AvroSchemaParseException('Enum Schema symbols are not a list');
+ }
+
+ if (count(array_unique($symbols)) > count($symbols)) {
+ throw new AvroSchemaParseException(
+ sprintf('Duplicate symbols: %s', $symbols)
+ );
+ }
+
+ foreach ($symbols as $symbol) {
+ if (!is_string($symbol) || empty($symbol)) {
+ throw new AvroSchemaParseException(
+ sprintf('Enum schema symbol must be a string %s', print_r($symbol, true))
+ );
+ }
+ }
+
+ parent::__construct(AvroSchema::ENUM_SCHEMA, $name, $doc, $schemata);
+ $this->symbols = $symbols;
+ }
+
+ /**
+ * @returns string[] this enum schema's symbols
+ */
+ public function symbols()
+ {
+ return $this->symbols;
+ }
+
+ /**
+ * @param string $symbol
+ * @returns boolean true if the given symbol exists in this
+ * enum schema and false otherwise
+ */
+ public function hasSymbol($symbol)
+ {
+ return in_array($symbol, $this->symbols);
+ }
+
+ /**
+ * @param int $index
+ * @returns string enum schema symbol with the given (zero-based) index
+ */
+ public function symbolByIndex($index)
+ {
+ if (array_key_exists($index, $this->symbols)) {
+ return $this->symbols[$index];
+ }
+ throw new AvroException(sprintf('Invalid symbol index %d', $index));
+ }
+
+ /**
+ * @param string $symbol
+ * @returns int the index of the given $symbol in the enum schema
+ */
+ public function symbolIndex($symbol)
+ {
+ $idx = array_search($symbol, $this->symbols, true);
+ if (false !== $idx) {
+ return $idx;
+ }
+ throw new AvroException(sprintf("Invalid symbol value '%s'", $symbol));
+ }
+
+ /**
+ * @returns mixed
+ */
+ public function toAvro()
+ {
+ $avro = parent::toAvro();
+ $avro[AvroSchema::SYMBOLS_ATTR] = $this->symbols;
+ return $avro;
+ }
+}
diff --git a/lang/php/lib/Schema/AvroField.php b/lang/php/lib/Schema/AvroField.php
new file mode 100644
index 0000000..9e63c1d
--- /dev/null
+++ b/lang/php/lib/Schema/AvroField.php
@@ -0,0 +1,189 @@
+<?php
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Apache\Avro\Schema;
+
+/**
+ * Field of an {@link AvroRecordSchema}
+ * @package Avro
+ */
+class AvroField extends AvroSchema
+{
+ /**
+ * @var string fields name attribute name
+ */
+ const FIELD_NAME_ATTR = 'name';
+
+ /**
+ * @var string
+ */
+ const DEFAULT_ATTR = 'default';
+
+ /**
+ * @var string
+ */
+ const ORDER_ATTR = 'order';
+
+ /**
+ * @var string
+ */
+ const ASC_SORT_ORDER = 'ascending';
+
+ /**
+ * @var string
+ */
+ const DESC_SORT_ORDER = 'descending';
+
+ /**
+ * @var string
+ */
+ const IGNORE_SORT_ORDER = 'ignore';
+
+ /**
+ * @var array list of valid field sort order values
+ */
+ private static $validFieldSortOrders = array(
+ self::ASC_SORT_ORDER,
+ self::DESC_SORT_ORDER,
+ self::IGNORE_SORT_ORDER
+ );
+ /**
+ * @var string
+ */
+ private $name;
+ /**
+ * @var boolean whether or no there is a default value
+ */
+ private $hasDefault;
+ /**
+ * @var string field default value
+ */
+ private $default;
+ /**
+ * @var string sort order of this field
+ */
+ private $order;
+ /**
+ * @var boolean whether or not the AvroNamedSchema of this field is
+ * defined in the AvroNamedSchemata instance
+ */
+ private $isTypeFromSchemata;
+
+ /**
+ * @param string $type
+ * @param string $name
+ * @param AvroSchema $schema
+ * @param boolean $is_type_from_schemata
+ * @param string $default
+ * @param string $order
+ * @todo Check validity of $default value
+ * @todo Check validity of $order value
+ */
+ public function __construct(
+ $name,
+ $schema,
+ $is_type_from_schemata,
+ $has_default,
+ $default,
+ $order = null
+ ) {
+ if (!AvroName::isWellFormedName($name)) {
+ throw new AvroSchemaParseException('Field requires a "name" attribute');
+ }
+
+ $this->type = $schema;
+ $this->isTypeFromSchemata = $is_type_from_schemata;
+ $this->name = $name;
+ $this->hasDefault = $has_default;
+ if ($this->hasDefault) {
+ $this->default = $default;
+ }
+ self::checkOrderValue($order);
+ $this->order = $order;
+ }
+
+ /**
+ * @param string $order
+ * @throws AvroSchemaParseException if $order is not a valid
+ * field order value.
+ */
+ private static function checkOrderValue($order)
+ {
+ if (!is_null($order) && !self::isValidFieldSortOrder($order)) {
+ throw new AvroSchemaParseException(
+ sprintf('Invalid field sort order %s', $order)
+ );
+ }
+ }
+
+ /**
+ * @param string $order
+ * @returns boolean
+ */
+ private static function isValidFieldSortOrder($order)
+ {
+ return in_array($order, self::$validFieldSortOrders);
+ }
+
+ /**
+ * @returns mixed
+ */
+ public function toAvro()
+ {
+ $avro = array(AvroField::FIELD_NAME_ATTR => $this->name);
+
+ $avro[AvroSchema::TYPE_ATTR] = ($this->isTypeFromSchemata)
+ ? $this->type->qualifiedName() : $this->type->toAvro();
+
+ if (isset($this->default)) {
+ $avro[AvroField::DEFAULT_ATTR] = $this->default;
+ }
+
+ if ($this->order) {
+ $avro[AvroField::ORDER_ATTR] = $this->order;
+ }
+
+ return $avro;
+ }
+
+ /**
+ * @returns string the name of this field
+ */
+ public function name()
+ {
+ return $this->name;
+ }
+
+ /**
+ * @returns mixed the default value of this field
+ */
+ public function defaultValue()
+ {
+ return $this->default;
+ }
+
+ /**
+ * @returns boolean true if the field has a default and false otherwise
+ */
+ public function hasDefaultValue()
+ {
+ return $this->hasDefault;
+ }
+}
diff --git a/lang/php/lib/Schema/AvroFixedSchema.php b/lang/php/lib/Schema/AvroFixedSchema.php
new file mode 100644
index 0000000..1091765
--- /dev/null
+++ b/lang/php/lib/Schema/AvroFixedSchema.php
@@ -0,0 +1,68 @@
+<?php
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Apache\Avro\Schema;
+
+/**
+ * AvroNamedSchema with fixed-length data values
+ * @package Avro
+ */
+class AvroFixedSchema extends AvroNamedSchema
+{
+ /**
+ * @var int byte count of this fixed schema data value
+ */
+ private $size;
+
+ /**
+ * @param AvroName $name
+ * @param string $doc Set to null, as fixed schemas don't have doc strings
+ * @param int $size byte count of this fixed schema data value
+ * @param AvroNamedSchemata &$schemata
+ */
+ public function __construct($name, $doc, $size, &$schemata = null)
+ {
+ if (!is_int($size)) {
+ throw new AvroSchemaParseException(
+ 'Fixed Schema requires a valid integer for "size" attribute'
+ );
+ }
+ parent::__construct(AvroSchema::FIXED_SCHEMA, $name, null, $schemata);
+ $this->size = $size;
+ }
+
+ /**
+ * @returns int byte count of this fixed schema data value
+ */
+ public function size()
+ {
+ return $this->size;
+ }
+
+ /**
+ * @returns mixed
+ */
+ public function toAvro()
+ {
+ $avro = parent::toAvro();
+ $avro[AvroSchema::SIZE_ATTR] = $this->size;
+ return $avro;
+ }
+}
diff --git a/lang/php/lib/Schema/AvroMapSchema.php b/lang/php/lib/Schema/AvroMapSchema.php
new file mode 100644
index 0000000..dfcb9da
--- /dev/null
+++ b/lang/php/lib/Schema/AvroMapSchema.php
@@ -0,0 +1,90 @@
+<?php
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Apache\Avro\Schema;
+
+/**
+ * Avro map schema consisting of named values of defined
+ * Avro Schema types.
+ * @package Avro
+ */
+class AvroMapSchema extends AvroSchema
+{
+ /**
+ * @var string|AvroSchema named schema name or AvroSchema
+ * of map schema values.
+ */
+ private $values;
+
+ /**
+ * @var boolean true if the named schema
+ * XXX Couldn't we derive this based on whether or not
+ * $this->values is a string?
+ */
+ private $isValuesSchemaFromSchemata;
+
+ /**
+ * @param string|AvroSchema $values
+ * @param string $defaultNamespace namespace of enclosing schema
+ * @param AvroNamedSchemata &$schemata
+ */
+ public function __construct($values, $defaultNamespace, &$schemata = null)
+ {
+ parent::__construct(AvroSchema::MAP_SCHEMA);
+
+ $this->isValuesSchemaFromSchemata = false;
+ $values_schema = null;
+ if (
+ is_string($values)
+ && $values_schema = $schemata->schemaByName(
+ new AvroName($values, null, $defaultNamespace)
+ )
+ ) {
+ $this->isValuesSchemaFromSchemata = true;
+ } else {
+ $values_schema = AvroSchema::subparse(
+ $values,
+ $defaultNamespace,
+ $schemata
+ );
+ }
+
+ $this->values = $values_schema;
+ }
+
+ /**
+ * @returns XXX|AvroSchema
+ */
+ public function values()
+ {
+ return $this->values;
+ }
+
+ /**
+ * @returns mixed
+ */
+ public function toAvro()
+ {
+ $avro = parent::toAvro();
+ $avro[AvroSchema::VALUES_ATTR] = $this->isValuesSchemaFromSchemata
+ ? $this->values->qualifiedName() : $this->values->toAvro();
+ return $avro;
+ }
+}
diff --git a/lang/php/lib/Schema/AvroName.php b/lang/php/lib/Schema/AvroName.php
new file mode 100644
index 0000000..9dd38f0
--- /dev/null
+++ b/lang/php/lib/Schema/AvroName.php
@@ -0,0 +1,168 @@
+<?php
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Apache\Avro\Schema;
+
+/**
+ * @package Avro
+ */
+class AvroName
+{
+ /**
+ * @var string character used to separate names comprising the fullname
+ */
+ const NAME_SEPARATOR = '.';
+
+ /**
+ * @var string regular expression to validate name values
+ */
+ const NAME_REGEXP = '/^[A-Za-z_][A-Za-z0-9_]*$/';
+ /**
+ * @var string valid names are matched by self::NAME_REGEXP
+ */
+ private $name;
+ /**
+ * @var string
+ */
+ private $namespace;
+ /**
+ * @var string
+ */
+ private $fullname;
+ /**
+ * @var string Name qualified as necessary given its default namespace.
+ */
+ private $qualified_name;
+
+ /**
+ * @param string $name
+ * @param string $namespace
+ * @param string $default_namespace
+ */
+ public function __construct($name, $namespace, $default_namespace)
+ {
+ if (!is_string($name) || empty($name)) {
+ throw new AvroSchemaParseException('Name must be a non-empty string.');
+ }
+
+ if (strpos($name, self::NAME_SEPARATOR) && self::checkNamespaceNames($name)) {
+ $this->fullname = $name;
+ } elseif (0 === preg_match(self::NAME_REGEXP, $name)) {
+ throw new AvroSchemaParseException(sprintf('Invalid name "%s"', $name));
+ } elseif (!is_null($namespace)) {
+ $this->fullname = self::parseFullname($name, $namespace);
+ } elseif (!is_null($default_namespace)) {
+ $this->fullname = self::parseFullname($name, $default_namespace);
+ } else {
+ $this->fullname = $name;
+ }
+
+ [$this->name, $this->namespace] = self::extractNamespace($this->fullname);
+ $this->qualified_name = (is_null($this->namespace) || $this->namespace === $default_namespace)
+ ? $this->name
+ : $this->fullname;
+ }
+
+ /**
+ * @param string $namespace
+ * @returns boolean true if namespace is composed of valid names
+ * @throws AvroSchemaParseException if any of the namespace components
+ * are invalid.
+ */
+ private static function checkNamespaceNames($namespace)
+ {
+ foreach (explode(self::NAME_SEPARATOR, $namespace) as $n) {
+ if (empty($n) || (0 === preg_match(self::NAME_REGEXP, $n))) {
+ throw new AvroSchemaParseException(sprintf('Invalid name "%s"', $n));
+ }
+ }
+ return true;
+ }
+
+ /**
+ * @param string $name
+ * @param string $namespace
+ * @returns string
+ * @throws AvroSchemaParseException if any of the names are not valid.
+ */
+ private static function parseFullname($name, $namespace)
+ {
+ if (!is_string($namespace) || empty($namespace)) {
+ throw new AvroSchemaParseException('Namespace must be a non-empty string.');
+ }
+ self::checkNamespaceNames($namespace);
+ return $namespace . '.' . $name;
+ }
+
+ /**
+ * @returns string[] array($name, $namespace)
+ */
+ public static function extractNamespace($name, $namespace = null)
+ {
+ $parts = explode(self::NAME_SEPARATOR, $name);
+ if (count($parts) > 1) {
+ $name = array_pop($parts);
+ $namespace = implode(self::NAME_SEPARATOR, $parts);
+ }
+ return [$name, $namespace];
+ }
+
+ /**
+ * @returns boolean true if the given name is well-formed
+ * (is a non-null, non-empty string) and false otherwise
+ */
+ public static function isWellFormedName($name)
+ {
+ return (is_string($name) && !empty($name) && preg_match(self::NAME_REGEXP, $name));
+ }
+
+ /**
+ * @returns array array($name, $namespace)
+ */
+ public function nameAndNamespace()
+ {
+ return [$this->name, $this->namespace];
+ }
+
+ /**
+ * @returns string fullname
+ * @uses $this->fullname()
+ */
+ public function __toString()
+ {
+ return $this->fullname();
+ }
+
+ /**
+ * @returns string
+ */
+ public function fullname()
+ {
+ return $this->fullname;
+ }
+
+ /**
+ * @returns string name qualified for its context
+ */
+ public function qualifiedName()
+ {
+ return $this->qualified_name;
+ }
+}
diff --git a/lang/php/lib/Schema/AvroNamedSchema.php b/lang/php/lib/Schema/AvroNamedSchema.php
new file mode 100644
index 0000000..ef7fed1
--- /dev/null
+++ b/lang/php/lib/Schema/AvroNamedSchema.php
@@ -0,0 +1,92 @@
+<?php
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Apache\Avro\Schema;
+
+/**
+ * Parent class of named Avro schema
+ * @package Avro
+ * @todo Refactor AvroNamedSchema to use an AvroName instance
+ * to store name information.
+ */
+class AvroNamedSchema extends AvroSchema
+{
+ /**
+ * @var AvroName $name
+ */
+ private $name;
+
+ /**
+ * @var string documentation string
+ */
+ private $doc;
+
+ /**
+ * @param string $type
+ * @param AvroName $name
+ * @param string $doc documentation string
+ * @param AvroNamedSchemata &$schemata
+ * @throws AvroSchemaParseException
+ */
+ public function __construct($type, $name, $doc = null, &$schemata = null)
+ {
+ parent::__construct($type);
+ $this->name = $name;
+
+ if ($doc && !is_string($doc)) {
+ throw new AvroSchemaParseException('Schema doc attribute must be a string');
+ }
+ $this->doc = $doc;
+
+ if (!is_null($schemata)) {
+ $schemata = $schemata->cloneWithNewSchema($this);
+ }
+ }
+
+ /**
+ * @returns mixed
+ */
+ public function toAvro()
+ {
+ $avro = parent::toAvro();
+ [$name, $namespace] = AvroName::extractNamespace($this->qualifiedName());
+ $avro[AvroSchema::NAME_ATTR] = $name;
+ if ($namespace) {
+ $avro[AvroSchema::NAMESPACE_ATTR] = $namespace;
+ }
+ if (!is_null($this->doc)) {
+ $avro[AvroSchema::DOC_ATTR] = $this->doc;
+ }
+ return $avro;
+ }
+
+ public function qualifiedName()
+ {
+ return $this->name->qualifiedName();
+ }
+
+ /**
+ * @returns string
+ */
+ public function fullname()
+ {
+ return $this->name->fullname();
+ }
+}
diff --git a/lang/php/lib/Schema/AvroNamedSchemata.php b/lang/php/lib/Schema/AvroNamedSchemata.php
new file mode 100644
index 0000000..1e7a88b
--- /dev/null
+++ b/lang/php/lib/Schema/AvroNamedSchemata.php
@@ -0,0 +1,103 @@
+<?php
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Apache\Avro\Schema;
+
+/**
+ * Keeps track of AvroNamedSchema which have been observed so far,
+ * as well as the default namespace.
+ *
+ * @package Avro
+ */
+class AvroNamedSchemata
+{
+ /**
+ * @var AvroNamedSchema[]
+ */
+ private $schemata;
+
+ /**
+ * @param AvroNamedSchemata[]
+ */
+ public function __construct($schemata = array())
+ {
+ $this->schemata = $schemata;
+ }
+
+ public function listSchemas()
+ {
+ var_export($this->schemata);
+ foreach ($this->schemata as $sch) {
+ print('Schema ' . $sch->__toString() . "\n");
+ }
+ }
+
+ /**
+ * @param AvroName $name
+ * @returns AvroSchema|null
+ */
+ public function schemaByName($name)
+ {
+ return $this->schema($name->fullname());
+ }
+
+ /**
+ * @param string $fullname
+ * @returns AvroSchema|null the schema which has the given name,
+ * or null if there is no schema with the given name.
+ */
+ public function schema($fullname)
+ {
+ if (isset($this->schemata[$fullname])) {
+ return $this->schemata[$fullname];
+ }
+ return null;
+ }
+
+ /**
+ * Creates a new AvroNamedSchemata instance of this schemata instance
+ * with the given $schema appended.
+ * @param AvroNamedSchema schema to add to this existing schemata
+ * @returns AvroNamedSchemata
+ */
+ public function cloneWithNewSchema($schema)
+ {
+ $name = $schema->fullname();
+ if (AvroSchema::isValidType($name)) {
+ throw new AvroSchemaParseException(sprintf('Name "%s" is a reserved type name', $name));
+ }
+ if ($this->hasName($name)) {
+ throw new AvroSchemaParseException(sprintf('Name "%s" is already in use', $name));
+ }
+ $schemata = new AvroNamedSchemata($this->schemata);
+ $schemata->schemata[$name] = $schema;
+ return $schemata;
+ }
+
+ /**
+ * @param string $fullname
+ * @returns boolean true if there exists a schema with the given name
+ * and false otherwise.
+ */
+ public function hasName($fullname)
+ {
+ return array_key_exists($fullname, $this->schemata);
+ }
+}
diff --git a/lang/php/lib/Schema/AvroPrimitiveSchema.php b/lang/php/lib/Schema/AvroPrimitiveSchema.php
new file mode 100644
index 0000000..94686d8
--- /dev/null
+++ b/lang/php/lib/Schema/AvroPrimitiveSchema.php
@@ -0,0 +1,54 @@
+<?php
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Apache\Avro\Schema;
+
+/**
+ * Avro schema for basic types such as null, int, long, string.
+ * @package Avro
+ */
+class AvroPrimitiveSchema extends AvroSchema
+{
+ /**
+ * @param string $type the primitive schema type name
+ * @throws AvroSchemaParseException if the given $type is not a
+ * primitive schema type name
+ */
+ public function __construct($type)
+ {
+ if (!self::isPrimitiveType($type)) {
+ throw new AvroSchemaParseException(sprintf('%s is not a valid primitive type.', $type));
+ }
+ parent::__construct($type);
+ }
+
+ /**
+ * @returns mixed
+ */
+ public function toAvro()
+ {
+ $avro = parent::toAvro();
+ // FIXME: Is this if really necessary? When *wouldn't* this be the case?
+ if (1 == count($avro)) {
+ return $this->type;
+ }
+ return $avro;
+ }
+}
diff --git a/lang/php/lib/Schema/AvroRecordSchema.php b/lang/php/lib/Schema/AvroRecordSchema.php
new file mode 100644
index 0000000..b3b2404
--- /dev/null
+++ b/lang/php/lib/Schema/AvroRecordSchema.php
@@ -0,0 +1,173 @@
+<?php
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Apache\Avro\Schema;
+
+use Apache\Avro\AvroUtil;
+
+/**
+ * @package Avro
+ */
+class AvroRecordSchema extends AvroNamedSchema
+{
+ /**
+ * @var AvroSchema[] array of AvroNamedSchema field definitions of
+ * this AvroRecordSchema
+ */
+ private $fields;
+ /**
+ * @var array map of field names to field objects.
+ * @internal Not called directly. Memoization of AvroRecordSchema->fieldsHash()
+ */
+ private $fieldsHash;
+
+ /**
+ * @param string $name
+ * @param string $namespace
+ * @param string $doc
+ * @param array $fields
+ * @param AvroNamedSchemata &$schemata
+ * @param string $schema_type schema type name
+ * @throws AvroSchemaParseException
+ */
+ public function __construct(
+ $name,
+ $doc,
+ $fields,
+ &$schemata = null,
+ $schema_type = AvroSchema::RECORD_SCHEMA
+ ) {
+ if (is_null($fields)) {
+ throw new AvroSchemaParseException(
+ 'Record schema requires a non-empty fields attribute'
+ );
+ }
+
+ if (AvroSchema::REQUEST_SCHEMA == $schema_type) {
+ parent::__construct($schema_type, $name);
+ } else {
+ parent::__construct($schema_type, $name, $doc, $schemata);
+ }
+
+ [$x, $namespace] = $name->nameAndNamespace();
+ $this->fields = self::parseFields($fields, $namespace, $schemata);
+ }
+
+ /**
+ * @param mixed $field_data
+ * @param string $default_namespace namespace of enclosing schema
+ * @param AvroNamedSchemata &$schemata
+ * @returns AvroField[]
+ * @throws AvroSchemaParseException
+ */
+ public static function parseFields($field_data, $default_namespace, &$schemata)
+ {
+ $fields = array();
+ $field_names = array();
+ foreach ($field_data as $index => $field) {
+ $name = AvroUtil::arrayValue($field, AvroField::FIELD_NAME_ATTR);
+ $type = AvroUtil::arrayValue($field, AvroSchema::TYPE_ATTR);
+ $order = AvroUtil::arrayValue($field, AvroField::ORDER_ATTR);
+
+ $default = null;
+ $has_default = false;
+ if (array_key_exists(AvroField::DEFAULT_ATTR, $field)) {
+ $default = $field[AvroField::DEFAULT_ATTR];
+ $has_default = true;
+ }
+
+ if (in_array($name, $field_names)) {
+ throw new AvroSchemaParseException(
+ sprintf("Field name %s is already in use", $name)
+ );
+ }
+
+ $is_schema_from_schemata = false;
+ $field_schema = null;
+ if (
+ is_string($type)
+ && $field_schema = $schemata->schemaByName(
+ new AvroName($type, null, $default_namespace)
+ )
+ ) {
+ $is_schema_from_schemata = true;
+ } else {
+ $field_schema = self::subparse($type, $default_namespace, $schemata);
+ }
+
+ $new_field = new AvroField(
+ $name,
+ $field_schema,
+ $is_schema_from_schemata,
+ $has_default,
+ $default,
+ $order
+ );
+ $field_names [] = $name;
+ $fields [] = $new_field;
+ }
+ return $fields;
+ }
+
+ /**
+ * @returns mixed
+ */
+ public function toAvro()
+ {
+ $avro = parent::toAvro();
+
+ $fields_avro = array();
+ foreach ($this->fields as $field) {
+ $fields_avro [] = $field->toAvro();
+ }
+
+ if (AvroSchema::REQUEST_SCHEMA == $this->type) {
+ return $fields_avro;
+ }
+
+ $avro[AvroSchema::FIELDS_ATTR] = $fields_avro;
+
+ return $avro;
+ }
+
+ /**
+ * @returns array the schema definitions of the fields of this AvroRecordSchema
+ */
+ public function fields()
+ {
+ return $this->fields;
+ }
+
+ /**
+ * @returns array a hash table of the fields of this AvroRecordSchema fields
+ * keyed by each field's name
+ */
+ public function fieldsHash()
+ {
+ if (is_null($this->fieldsHash)) {
+ $hash = array();
+ foreach ($this->fields as $field) {
+ $hash[$field->name()] = $field;
+ }
+ $this->fieldsHash = $hash;
+ }
+ return $this->fieldsHash;
+ }
+}
diff --git a/lang/php/lib/Schema/AvroSchema.php b/lang/php/lib/Schema/AvroSchema.php
new file mode 100644
index 0000000..7b43922
--- /dev/null
+++ b/lang/php/lib/Schema/AvroSchema.php
@@ -0,0 +1,556 @@
+<?php
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Apache\Avro\Schema;
+
+use Apache\Avro\AvroUtil;
+
+/** TODO
+ * - ARRAY have only type and item attributes (what about metadata?)
+ * - MAP keys are (assumed?) to be strings
+ * - FIXED size must be integer (must be positive? less than MAXINT?)
+ * - primitive type names cannot have a namespace (so throw an error? or ignore?)
+ * - schema may contain multiple definitions of a named schema
+ * if definitions are equivalent (?)
+ * - Cleanup default namespace and named schemata handling.
+ * - For one, it appears to be *too* global. According to the spec,
+ * we should only be referencing schemas that are named within the
+ * *enclosing* schema, so those in sibling schemas (say, unions or fields)
+ * shouldn't be referenced, if I understand the spec correctly.
+ * - Also, if a named schema is defined more than once in the same schema,
+ * it must have the same definition: so it appears we *do* need to keep
+ * track of named schemata globally as well. (And does this play well
+ * with the requirements regarding enclosing schema?
+ * - default values for bytes and fixed fields are JSON strings,
+ * where unicode code points 0-255 are mapped to unsigned 8-bit byte values 0-255
+ * - make sure other default values for other schema are of appropriate type
+ * - Should AvroField really be an AvroSchema object? Avro Fields have a name
+ * attribute, but not a namespace attribute (and the name can't be namespace
+ * qualified). It also has additional attributes such as doc, which named schemas
+ * enum and record have (though not fixed schemas, which also have names), and
+ * fields also have default and order attributes, shared by no other schema type.
+ */
+
+/**
+ * @package Avro
+ */
+class AvroSchema
+{
+ /**
+ * @var int lower bound of integer values: -(1 << 31)
+ */
+ const INT_MIN_VALUE = -2147483648;
+
+ /**
+ * @var int upper bound of integer values: (1 << 31) - 1
+ */
+ const INT_MAX_VALUE = 2147483647;
+
+ /**
+ * @var long lower bound of long values: -(1 << 63)
+ */
+ const LONG_MIN_VALUE = -9223372036854775808;
+
+ /**
+ * @var long upper bound of long values: (1 << 63) - 1
+ */
+ const LONG_MAX_VALUE = 9223372036854775807;
+
+ /**
+ * @var string null schema type name
+ */
+ const NULL_TYPE = 'null';
+
+ /**
+ * @var string boolean schema type name
+ */
+ const BOOLEAN_TYPE = 'boolean';
+
+ /**
+ * int schema type value is a 32-bit signed int
+ * @var string int schema type name.
+ */
+ const INT_TYPE = 'int';
+
+ /**
+ * long schema type value is a 64-bit signed int
+ * @var string long schema type name
+ */
+ const LONG_TYPE = 'long';
+
+ /**
+ * float schema type value is a 32-bit IEEE 754 floating-point number
+ * @var string float schema type name
+ */
+ const FLOAT_TYPE = 'float';
+
+ /**
+ * double schema type value is a 64-bit IEEE 754 floating-point number
+ * @var string double schema type name
+ */
+ const DOUBLE_TYPE = 'double';
+
+ /**
+ * string schema type value is a Unicode character sequence
+ * @var string string schema type name
+ */
+ const STRING_TYPE = 'string';
+
+ /**
+ * bytes schema type value is a sequence of 8-bit unsigned bytes
+ * @var string bytes schema type name
+ */
+ const BYTES_TYPE = 'bytes';
+
+ // Complex Types
+ // Unnamed Schema
+ /**
+ * @var string array schema type name
+ */
+ const ARRAY_SCHEMA = 'array';
+
+ /**
+ * @var string map schema type name
+ */
+ const MAP_SCHEMA = 'map';
+
+ /**
+ * @var string union schema type name
+ */
+ const UNION_SCHEMA = 'union';
+
+ /**
+ * Unions of error schemas are used by Avro messages
+ * @var string error_union schema type name
+ */
+ const ERROR_UNION_SCHEMA = 'error_union';
+
+ // Named Schema
+
+ /**
+ * @var string enum schema type name
+ */
+ const ENUM_SCHEMA = 'enum';
+
+ /**
+ * @var string fixed schema type name
+ */
+ const FIXED_SCHEMA = 'fixed';
+
+ /**
+ * @var string record schema type name
+ */
+ const RECORD_SCHEMA = 'record';
+ // Other Schema
+
+ /**
+ * @var string error schema type name
+ */
+ const ERROR_SCHEMA = 'error';
+
+ /**
+ * @var string request schema type name
+ */
+ const REQUEST_SCHEMA = 'request';
+
+
+ // Schema attribute names
+ /**
+ * @var string schema type name attribute name
+ */
+ const TYPE_ATTR = 'type';
+
+ /**
+ * @var string named schema name attribute name
+ */
+ const NAME_ATTR = 'name';
+
+ /**
+ * @var string named schema namespace attribute name
+ */
+ const NAMESPACE_ATTR = 'namespace';
+
+ /**
+ * @var string derived attribute: doesn't appear in schema
+ */
+ const FULLNAME_ATTR = 'fullname';
+
+ /**
+ * @var string array schema size attribute name
+ */
+ const SIZE_ATTR = 'size';
+
+ /**
+ * @var string record fields attribute name
+ */
+ const FIELDS_ATTR = 'fields';
+
+ /**
+ * @var string array schema items attribute name
+ */
+ const ITEMS_ATTR = 'items';
+
+ /**
+ * @var string enum schema symbols attribute name
+ */
+ const SYMBOLS_ATTR = 'symbols';
+
+ /**
+ * @var string map schema values attribute name
+ */
+ const VALUES_ATTR = 'values';
+
+ /**
+ * @var string document string attribute name
+ */
+ const DOC_ATTR = 'doc';
+
+ /**
+ * @var array list of primitive schema type names
+ */
+ private static $primitiveTypes = array(
+ self::NULL_TYPE,
+ self::BOOLEAN_TYPE,
+ self::STRING_TYPE,
+ self::BYTES_TYPE,
+ self::INT_TYPE,
+ self::LONG_TYPE,
+ self::FLOAT_TYPE,
+ self::DOUBLE_TYPE
+ );
+
+ /**
+ * @var array list of named schema type names
+ */
+ private static $namedTypes = array(
+ self::FIXED_SCHEMA,
+ self::ENUM_SCHEMA,
+ self::RECORD_SCHEMA,
+ self::ERROR_SCHEMA
+ );
+ /**
+ * @var array list of names of reserved attributes
+ */
+ private static $reservedAttrs = array(
+ self::TYPE_ATTR,
+ self::NAME_ATTR,
+ self::NAMESPACE_ATTR,
+ self::FIELDS_ATTR,
+ self::ITEMS_ATTR,
+ self::SIZE_ATTR,
+ self::SYMBOLS_ATTR,
+ self::VALUES_ATTR
+ );
+
+ /**
+ * @param string $type a schema type name
+ * @internal Should only be called from within the constructor of
+ * a class which extends AvroSchema
+ */
+ public function __construct($type)
+ {
+ $this->type = $type;
+ }
+
+ /**
+ * @param string $json JSON-encoded schema
+ * @uses self::realParse()
+ * @returns AvroSchema
+ */
+ public static function parse($json)
+ {
+ $schemata = new AvroNamedSchemata();
+ return self::realParse(json_decode($json, true), null, $schemata);
+ }
+
+ /**
+ * @param mixed $avro JSON-decoded schema
+ * @param string $default_namespace namespace of enclosing schema
+ * @param AvroNamedSchemata &$schemata reference to named schemas
+ * @returns AvroSchema
+ * @throws AvroSchemaParseException
+ */
+ public static function realParse($avro, $default_namespace = null, &$schemata = null)
+ {
+ if (is_null($schemata)) {
+ $schemata = new AvroNamedSchemata();
+ }
+
+ if (is_array($avro)) {
+ $type = AvroUtil::arrayValue($avro, self::TYPE_ATTR);
+
+ if (self::isPrimitiveType($type)) {
+ return new AvroPrimitiveSchema($type);
+ } elseif (self::isNamedType($type)) {
+ $name = AvroUtil::arrayValue($avro, self::NAME_ATTR);
+ $namespace = AvroUtil::arrayValue($avro, self::NAMESPACE_ATTR);
+ $new_name = new AvroName($name, $namespace, $default_namespace);
+ $doc = AvroUtil::arrayValue($avro, self::DOC_ATTR);
+ switch ($type) {
+ case self::FIXED_SCHEMA:
+ $size = AvroUtil::arrayValue($avro, self::SIZE_ATTR);
+ return new AvroFixedSchema(
+ $new_name,
+ $doc,
+ $size,
+ $schemata
+ );
+ case self::ENUM_SCHEMA:
+ $symbols = AvroUtil::arrayValue($avro, self::SYMBOLS_ATTR);
+ return new AvroEnumSchema(
+ $new_name,
+ $doc,
+ $symbols,
+ $schemata
+ );
+ case self::RECORD_SCHEMA:
+ case self::ERROR_SCHEMA:
+ $fields = AvroUtil::arrayValue($avro, self::FIELDS_ATTR);
+ return new AvroRecordSchema(
+ $new_name,
+ $doc,
+ $fields,
+ $schemata,
+ $type
+ );
+ default:
+ throw new AvroSchemaParseException(
+ sprintf('Unknown named type: %s', $type)
+ );
+ }
+ } elseif (self::isValidType($type)) {
+ switch ($type) {
+ case self::ARRAY_SCHEMA:
+ return new AvroArraySchema(
+ $avro[self::ITEMS_ATTR],
+ $default_namespace,
+ $schemata
+ );
+ case self::MAP_SCHEMA:
+ return new AvroMapSchema(
+ $avro[self::VALUES_ATTR],
+ $default_namespace,
+ $schemata
+ );
+ default:
+ throw new AvroSchemaParseException(
+ sprintf('Unknown valid type: %s', $type)
+ );
+ }
+ } elseif (
+ !array_key_exists(self::TYPE_ATTR, $avro)
+ && AvroUtil::isList($avro)
+ ) {
+ return new AvroUnionSchema($avro, $default_namespace, $schemata);
+ } else {
+ throw new AvroSchemaParseException(sprintf(
+ 'Undefined type: %s',
+ $type
+ ));
+ }
+ } elseif (self::isPrimitiveType($avro)) {
+ return new AvroPrimitiveSchema($avro);
+ } else {
+ throw new AvroSchemaParseException(
+ sprintf(
+ '%s is not a schema we know about.',
+ print_r($avro, true)
+ )
+ );
+ }
+ }
+
+ /**
+ * @param string $type a schema type name
+ * @returns boolean true if the given type name is a valid schema type
+ * name and false otherwise.
+ */
+ public static function isValidType($type)
+ {
+ return (self::isPrimitiveType($type)
+ || self::isNamedType($type)
+ || in_array($type, array(
+ self::ARRAY_SCHEMA,
+ self::MAP_SCHEMA,
+ self::UNION_SCHEMA,
+ self::REQUEST_SCHEMA,
+ self::ERROR_UNION_SCHEMA
+ )));
+ }
+
+ /**
+ * @param string $type a schema type name
+ * @returns boolean true if the given type name is a primitive schema type
+ * name and false otherwise.
+ */
+ public static function isPrimitiveType($type)
+ {
+ return in_array($type, self::$primitiveTypes);
+ }
+
+ /**
+ * @param string $type a schema type name
+ * @returns boolean true if the given type name is a named schema type name
+ * and false otherwise.
+ */
+ public static function isNamedType($type)
+ {
+ return in_array($type, self::$namedTypes);
+ }
+
+ /**
+ * @returns boolean true if $datum is valid for $expected_schema
+ * and false otherwise.
+ * @throws AvroSchemaParseException
+ */
+ public static function isValidDatum($expected_schema, $datum)
+ {
+ switch ($expected_schema->type) {
+ case self::NULL_TYPE:
+ return is_null($datum);
+ case self::BOOLEAN_TYPE:
+ return is_bool($datum);
+ case self::STRING_TYPE:
+ case self::BYTES_TYPE:
+ return is_string($datum);
+ case self::INT_TYPE:
+ return (is_int($datum)
+ && (self::INT_MIN_VALUE <= $datum)
+ && ($datum <= self::INT_MAX_VALUE));
+ case self::LONG_TYPE:
+ return (is_int($datum)
+ && (self::LONG_MIN_VALUE <= $datum)
+ && ($datum <= self::LONG_MAX_VALUE));
+ case self::FLOAT_TYPE:
+ case self::DOUBLE_TYPE:
+ return (is_float($datum) || is_int($datum));
+ case self::ARRAY_SCHEMA:
+ if (is_array($datum)) {
+ foreach ($datum as $d) {
+ if (!self::isValidDatum($expected_schema->items(), $d)) {
+ return false;
+ }
+ }
+ return true;
+ }
+ return false;
+ case self::MAP_SCHEMA:
+ if (is_array($datum)) {
+ foreach ($datum as $k => $v) {
+ if (
+ !is_string($k)
+ || !self::isValidDatum($expected_schema->values(), $v)
+ ) {
+ return false;
+ }
+ }
+ return true;
+ }
+ return false;
+ case self::UNION_SCHEMA:
+ foreach ($expected_schema->schemas() as $schema) {
+ if (self::isValidDatum($schema, $datum)) {
+ return true;
+ }
+ }
+ return false;
+ case self::ENUM_SCHEMA:
+ return in_array($datum, $expected_schema->symbols());
+ case self::FIXED_SCHEMA:
+ return (is_string($datum)
+ && (strlen($datum) == $expected_schema->size()));
+ case self::RECORD_SCHEMA:
+ case self::ERROR_SCHEMA:
+ case self::REQUEST_SCHEMA:
+ if (is_array($datum)) {
+ foreach ($expected_schema->fields() as $field) {
+ if (
+ !array_key_exists($field->name(), $datum) || !self::isValidDatum(
+ $field->type(),
+ $datum[$field->name()]
+ )
+ ) {
+ return false;
+ }
+ }
+ return true;
+ }
+ return false;
+ default:
+ throw new AvroSchemaParseException(sprintf('%s is not allowed.', $expected_schema));
+ }
+ }
+
+ /**
+ * @param mixed $avro
+ * @param string $default_namespace namespace of enclosing schema
+ * @param AvroNamedSchemata &$schemata
+ * @returns AvroSchema
+ * @throws AvroSchemaParseException
+ * @uses AvroSchema::realParse()
+ */
+ protected static function subparse($avro, $default_namespace, &$schemata = null)
+ {
+ try {
+ return self::realParse($avro, $default_namespace, $schemata);
+ } catch (AvroSchemaParseException $e) {
+ throw $e;
+ } catch (\Exception $e) {
+ throw new AvroSchemaParseException(
+ sprintf(
+ 'Sub-schema is not a valid Avro schema. Bad schema: %s',
+ print_r($avro, true)
+ )
+ );
+ }
+ }
+
+ /**
+ * @returns string schema type name of this schema
+ */
+ public function type()
+ {
+ return $this->type;
+ }
+
+ /**
+ * @returns string the JSON-encoded representation of this Avro schema.
+ */
+ public function __toString()
+ {
+ return (string) json_encode($this->toAvro());
+ }
+
+ /**
+ * @returns mixed
+ */
+ public function toAvro()
+ {
+ return array(self::TYPE_ATTR => $this->type);
+ }
+
+ /**
+ * @returns mixed value of the attribute with the given attribute name
+ */
+ public function attribute($attribute)
+ {
+ return $this->$attribute();
+ }
+}
diff --git a/lang/php/test/IODatumReaderTest.php b/lang/php/lib/Schema/AvroSchemaParseException.php
similarity index 64%
copy from lang/php/test/IODatumReaderTest.php
copy to lang/php/lib/Schema/AvroSchemaParseException.php
index 7e1e7d9..d6b0678 100644
--- a/lang/php/test/IODatumReaderTest.php
+++ b/lang/php/lib/Schema/AvroSchemaParseException.php
@@ -1,4 +1,5 @@
<?php
+
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
@@ -17,20 +18,14 @@
* limitations under the License.
*/
-require_once('test_helper.php');
+namespace Apache\Avro\Schema;
-class IODatumReaderTest extends PHPUnit\Framework\TestCase
-{
+use Apache\Avro\AvroException;
- public function testSchemaMatching()
- {
- $writers_schema = <<<JSON
- { "type": "map",
- "values": "bytes" }
-JSON;
- $readers_schema = $writers_schema;
- $this->assertTrue(AvroIODatumReader::schemas_match(
- AvroSchema::parse($writers_schema),
- AvroSchema::parse($readers_schema)));
- }
+/**
+ * Exceptions associated with parsing JSON schema represenations
+ * @package Avro
+ */
+class AvroSchemaParseException extends AvroException
+{
}
diff --git a/lang/php/lib/Schema/AvroUnionSchema.php b/lang/php/lib/Schema/AvroUnionSchema.php
new file mode 100644
index 0000000..796372a
--- /dev/null
+++ b/lang/php/lib/Schema/AvroUnionSchema.php
@@ -0,0 +1,123 @@
+<?php
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Apache\Avro\Schema;
+
+/**
+ * Union of Avro schemas, of which values can be of any of the schema in
+ * the union.
+ * @package Avro
+ */
+class AvroUnionSchema extends AvroSchema
+{
+ /**
+ * @var int[] list of indices of named schemas which
+ * are defined in $schemata
+ */
+ public $schemaFromSchemataIndices;
+ /**
+ * @var AvroSchema[] list of schemas of this union
+ */
+ private $schemas;
+
+ /**
+ * @param AvroSchema[] $schemas list of schemas in the union
+ * @param string $defaultNamespace namespace of enclosing schema
+ * @param AvroNamedSchemata &$schemata
+ */
+ public function __construct($schemas, $defaultNamespace, &$schemata = null)
+ {
+ parent::__construct(AvroSchema::UNION_SCHEMA);
+
+ $this->schemaFromSchemataIndices = array();
+ $schema_types = array();
+ foreach ($schemas as $index => $schema) {
+ $is_schema_from_schemata = false;
+ $new_schema = null;
+ if (
+ is_string($schema)
+ && ($new_schema = $schemata->schemaByName(
+ new AvroName($schema, null, $defaultNamespace)
+ ))
+ ) {
+ $is_schema_from_schemata = true;
+ } else {
+ $new_schema = self::subparse($schema, $defaultNamespace, $schemata);
+ }
+
+ $schemaType = $new_schema->type;
+ if (
+ self::isValidType($schemaType)
+ && !self::isNamedType($schemaType)
+ && in_array($schemaType, $schema_types)
+ ) {
+ throw new AvroSchemaParseException(sprintf('"%s" is already in union', $schemaType));
+ }
+
+ if (AvroSchema::UNION_SCHEMA === $schemaType) {
+ throw new AvroSchemaParseException('Unions cannot contain other unions');
+ }
+
+ $schema_types[] = $schemaType;
+ $this->schemas[] = $new_schema;
+ if ($is_schema_from_schemata) {
+ $this->schemaFromSchemataIndices [] = $index;
+ }
+ }
+ }
+
+ /**
+ * @returns AvroSchema[]
+ */
+ public function schemas()
+ {
+ return $this->schemas;
+ }
+
+ /**
+ * @returns AvroSchema the particular schema from the union for
+ * the given (zero-based) index.
+ * @throws AvroSchemaParseException if the index is invalid for this schema.
+ */
+ public function schemaByIndex($index)
+ {
+ if (count($this->schemas) > $index) {
+ return $this->schemas[$index];
+ }
+
+ throw new AvroSchemaParseException('Invalid union schema index');
+ }
+
+ /**
+ * @returns mixed
+ */
+ public function toAvro()
+ {
+ $avro = array();
+
+ foreach ($this->schemas as $index => $schema) {
+ $avro[] = in_array($index, $this->schemaFromSchemataIndices)
+ ? $schema->qualifiedName()
+ : $schema->toAvro();
+ }
+
+ return $avro;
+ }
+}
diff --git a/lang/php/lib/autoload.php b/lang/php/lib/autoload.php
new file mode 100644
index 0000000..5326aa7
--- /dev/null
+++ b/lang/php/lib/autoload.php
@@ -0,0 +1,61 @@
+<?php
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+include __DIR__ . '/Avro.php';
+include __DIR__ . '/AvroDebug.php';
+include __DIR__ . '/AvroException.php';
+include __DIR__ . '/AvroGMP.php';
+include __DIR__ . '/AvroIO.php';
+include __DIR__ . '/AvroNotImplementedException.php';
+include __DIR__ . '/AvroUtil.php';
+
+include __DIR__ . '/DataFile/AvroDataIO.php';
+include __DIR__ . '/DataFile/AvroDataIOException.php';
+include __DIR__ . '/DataFile/AvroDataIOReader.php';
+include __DIR__ . '/DataFile/AvroDataIOWriter.php';
+
+include __DIR__ . '/Datum/AvroIOBinaryDecoder.php';
+include __DIR__ . '/Datum/AvroIOBinaryEncoder.php';
+include __DIR__ . '/Datum/AvroIODatumReader.php';
+include __DIR__ . '/Datum/AvroIODatumWriter.php';
+include __DIR__ . '/Datum/AvroIOSchemaMatchException.php';
+include __DIR__ . '/Datum/AvroIOTypeException.php';
+
+include __DIR__ . '/IO/AvroFile.php';
+include __DIR__ . '/IO/AvroIOException.php';
+include __DIR__ . '/IO/AvroStringIO.php';
+
+include __DIR__ . '/Protocol/AvroProtocol.php';
+include __DIR__ . '/Protocol/AvroProtocolMessage.php';
+include __DIR__ . '/Protocol/AvroProtocolParseException.php';
+
+include __DIR__ . '/Schema/AvroArraySchema.php';
+include __DIR__ . '/Schema/AvroEnumSchema.php';
+include __DIR__ . '/Schema/AvroField.php';
+include __DIR__ . '/Schema/AvroFixedSchema.php';
+include __DIR__ . '/Schema/AvroMapSchema.php';
+include __DIR__ . '/Schema/AvroName.php';
+include __DIR__ . '/Schema/AvroNamedSchema.php';
+include __DIR__ . '/Schema/AvroNamedSchemata.php';
+include __DIR__ . '/Schema/AvroPrimitiveSchema.php';
+include __DIR__ . '/Schema/AvroRecordSchema.php';
+include __DIR__ . '/Schema/AvroSchema.php';
+include __DIR__ . '/Schema/AvroSchemaParseException.php';
+include __DIR__ . '/Schema/AvroUnionSchema.php';
diff --git a/lang/php/lib/avro.php b/lang/php/lib/avro.php
deleted file mode 100644
index b7f2279..0000000
--- a/lang/php/lib/avro.php
+++ /dev/null
@@ -1,201 +0,0 @@
-<?php
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Avro library top-level file.
- *
- * This file in turn includes all files supporting the
- * Avro implementation.
- *
- * @package Avro
- */
-
-/**
- * General Avro exceptions.
- * @package Avro
- */
-class AvroException extends Exception {}
-
-/**
- * Avro "not implemented method" exception.
- * @package Avro
- */
-class AvroNotImplementedException extends AvroException {}
-
-/**
- * Library-level class for PHP Avro port.
- *
- * Contains library details such as version number and platform checks.
- *
- * This port is an implementation of the
- * {@link https://avro.apache.org/docs/1.3.3/spec.html Avro 1.3.3 Specification}
- *
- * @package Avro
- *
- */
-class Avro
-{
- /**
- * @var string version number of Avro specification to which
- * this implemenation complies
- */
- const SPEC_VERSION = '1.3.3';
-
- /**#@+
- * Constant to enumerate endianness.
- * @access private
- * @var int
- */
- const BIG_ENDIAN = 0x00;
- const LITTLE_ENDIAN = 0x01;
- /**#@-*/
-
- /**
- * Memoized result of self::set_endianness()
- * @var int self::BIG_ENDIAN or self::LITTLE_ENDIAN
- * @see self::set_endianness()
- */
- private static $endianness;
-
- /**#@+
- * Constant to enumerate biginteger handling mode.
- * GMP is used, if available, on 32-bit platforms.
- */
- const PHP_BIGINTEGER_MODE = 0x00;
- const GMP_BIGINTEGER_MODE = 0x01;
- /**#@-*/
-
- /**
- * @var int
- * Mode used to handle bigintegers. After Avro::check_64_bit() has been called,
- * (usually via a call to Avro::check_platform(), set to
- * self::GMP_BIGINTEGER_MODE on 32-bit platforms that have GMP available,
- * and to self::PHP_BIGINTEGER_MODE otherwise.
- */
- private static $biginteger_mode;
-
- /**
- * Wrapper method to call each required check.
- *
- */
- public static function check_platform()
- {
- self::check_64_bit();
- self::check_little_endian();
- }
-
- /**
- * Determines if the host platform can encode and decode long integer data.
- *
- * @throws AvroException if the platform cannot handle long integers.
- */
- private static function check_64_bit()
- {
- if (8 != PHP_INT_SIZE)
- if (extension_loaded('gmp'))
- self::$biginteger_mode = self::GMP_BIGINTEGER_MODE;
- else
- throw new AvroException('This platform cannot handle a 64-bit operations. '
- . 'Please install the GMP PHP extension.');
- else
- self::$biginteger_mode = self::PHP_BIGINTEGER_MODE;
-
- }
-
- /**
- * @returns boolean true if the PHP GMP extension is used and false otherwise.
- * @internal Requires Avro::check_64_bit() (exposed via Avro::check_platform())
- * to have been called to set Avro::$biginteger_mode.
- */
- static function uses_gmp()
- {
- return (self::GMP_BIGINTEGER_MODE == self::$biginteger_mode);
- }
-
- /**
- * Determines if the host platform is little endian,
- * required for processing double and float data.
- *
- * @throws AvroException if the platform is not little endian.
- */
- private static function check_little_endian()
- {
- if (!self::is_little_endian_platform())
- throw new AvroException('This is not a little-endian platform');
- }
-
- /**
- * Determines the endianness of the host platform and memoizes
- * the result to Avro::$endianness.
- *
- * Based on a similar check perfomed in https://pear.php.net/package/Math_BinaryUtils
- *
- * @throws AvroException if the endianness cannot be determined.
- */
- private static function set_endianness()
- {
- $packed = pack('d', 1);
- switch ($packed)
- {
- case "\77\360\0\0\0\0\0\0":
- self::$endianness = self::BIG_ENDIAN;
- break;
- case "\0\0\0\0\0\0\360\77":
- self::$endianness = self::LITTLE_ENDIAN;
- break;
- default:
- throw new AvroException(
- sprintf('Error determining platform endianness: %s',
- AvroDebug::hex_string($packed)));
- }
- }
-
- /**
- * @returns boolean true if the host platform is big endian
- * and false otherwise.
- * @uses self::set_endianness()
- */
- private static function is_big_endian_platform()
- {
- if (is_null(self::$endianness))
- self::set_endianness();
-
- return (self::BIG_ENDIAN == self::$endianness);
- }
-
- /**
- * @returns boolean true if the host platform is little endian,
- * and false otherwise.
- * @uses self::is_bin_endian_platform()
- */
- private static function is_little_endian_platform()
- {
- return !self::is_big_endian_platform();
- }
-
-}
-
-require_once('avro/util.php');
-require_once('avro/debug.php');
-require_once('avro/schema.php');
-require_once('avro/io.php');
-require_once('avro/gmp.php');
-require_once('avro/datum.php');
-require_once('avro/data_file.php');
-require_once('avro/protocol.php');
diff --git a/lang/php/lib/avro/data_file.php b/lang/php/lib/avro/data_file.php
deleted file mode 100644
index b8d15bc..0000000
--- a/lang/php/lib/avro/data_file.php
+++ /dev/null
@@ -1,601 +0,0 @@
-<?php
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Classes handling reading and writing from and to AvroIO objects
- * @package Avro
- */
-
-/**
- * Raised when something unkind happens with respect to AvroDataIO.
- * @package Avro
- */
-class AvroDataIOException extends AvroException {}
-
-/**
- * @package Avro
- */
-class AvroDataIO
-{
- /**
- * @var int used in file header
- */
- const VERSION = 1;
-
- /**
- * @var int count of bytes in synchronization marker
- */
- const SYNC_SIZE = 16;
-
- /**
- * @var int count of items per block, arbitrarily set to 4000 * SYNC_SIZE
- * @todo make this value configurable
- */
- const SYNC_INTERVAL = 64000;
-
- /**
- * @var string map key for datafile metadata codec value
- */
- const METADATA_CODEC_ATTR = 'avro.codec';
-
- /**
- * @var string map key for datafile metadata schema value
- */
- const METADATA_SCHEMA_ATTR = 'avro.schema';
- /**
- * @var string JSON for datafile metadata schema
- */
- const METADATA_SCHEMA_JSON = '{"type":"map","values":"bytes"}';
-
- /**
- * @var string codec value for NULL codec
- */
- const NULL_CODEC = 'null';
-
- /**
- * @var string codec value for deflate codec
- */
- const DEFLATE_CODEC = 'deflate';
-
- const SNAPPY_CODEC = 'snappy';
-
- const ZSTANDARD_CODEC = 'zstandard';
-
- const BZIP2_CODEC = 'bzip2';
-
- /**
- * @var array array of valid codec names
- */
- private static $valid_codecs = [self::NULL_CODEC, self::DEFLATE_CODEC, self::SNAPPY_CODEC, self::ZSTANDARD_CODEC, self::BZIP2_CODEC];
-
- /**
- * @var AvroSchema cached version of metadata schema object
- */
- private static $metadata_schema;
-
- /**
- * @returns the initial "magic" segment of an Avro container file header.
- */
- public static function magic() { return ('Obj' . pack('c', self::VERSION)); }
-
- /**
- * @returns int count of bytes in the initial "magic" segment of the
- * Avro container file header
- */
- public static function magic_size() { return strlen(self::magic()); }
-
-
- /**
- * @returns AvroSchema object of Avro container file metadata.
- */
- public static function metadata_schema()
- {
- if (is_null(self::$metadata_schema))
- self::$metadata_schema = AvroSchema::parse(self::METADATA_SCHEMA_JSON);
- return self::$metadata_schema;
- }
-
- /**
- * @param string $file_path file_path of file to open
- * @param string $mode one of AvroFile::READ_MODE or AvroFile::WRITE_MODE
- * @param string $schema_json JSON of writer's schema
- * @param string $codec compression codec
- * @returns AvroDataIOWriter instance of AvroDataIOWriter
- *
- * @throws AvroDataIOException if $writers_schema is not provided
- * or if an invalid $mode is given.
- */
- public static function open_file($file_path, $mode=AvroFile::READ_MODE,
- $schema_json=null, $codec=self::NULL_CODEC)
- {
- $schema = !is_null($schema_json)
- ? AvroSchema::parse($schema_json) : null;
-
- $io = false;
- switch ($mode)
- {
- case AvroFile::WRITE_MODE:
- if (is_null($schema))
- throw new AvroDataIOException('Writing an Avro file requires a schema.');
- $file = new AvroFile($file_path, AvroFile::WRITE_MODE);
- $io = self::open_writer($file, $schema, $codec);
- break;
- case AvroFile::READ_MODE:
- $file = new AvroFile($file_path, AvroFile::READ_MODE);
- $io = self::open_reader($file, $schema);
- break;
- default:
- throw new AvroDataIOException(
- sprintf("Only modes '%s' and '%s' allowed. You gave '%s'.",
- AvroFile::READ_MODE, AvroFile::WRITE_MODE, $mode));
- }
- return $io;
- }
-
- /**
- * @returns array array of valid codecs
- */
- public static function valid_codecs()
- {
- return self::$valid_codecs;
- }
-
- /**
- * @param string $codec
- * @returns boolean true if $codec is a valid codec value and false otherwise
- */
- public static function is_valid_codec($codec)
- {
- return in_array($codec, self::valid_codecs());
- }
-
- /**
- * @param AvroIO $io
- * @param AvroSchema $schema
- * @param string $codec
- * @returns AvroDataIOWriter
- */
- protected static function open_writer($io, $schema, $codec=self::NULL_CODEC)
- {
- $writer = new AvroIODatumWriter($schema);
- return new AvroDataIOWriter($io, $writer, $schema, $codec);
- }
-
- /**
- * @param AvroIO $io
- * @param AvroSchema $schema
- * @returns AvroDataIOReader
- */
- protected static function open_reader($io, $schema)
- {
- $reader = new AvroIODatumReader(null, $schema);
- return new AvroDataIOReader($io, $reader);
- }
-
-}
-
-/**
- *
- * Reads Avro data from an AvroIO source using an AvroSchema.
- * @package Avro
- */
-class AvroDataIOReader
-{
- /**
- * @var AvroIO
- */
- private $io;
-
- /**
- * @var AvroIOBinaryDecoder
- */
- private $decoder;
-
- /**
- * @var AvroIODatumReader
- */
- private $datum_reader;
-
- /**
- * @var string
- */
- public $sync_marker;
-
- /**
- * @var array object container metadata
- */
- public $metadata;
-
- /**
- * @var int count of items in block
- */
- private $block_count;
-
- /**
- * @var compression codec
- */
- private $codec;
-
- /**
- * @param AvroIO $io source from which to read
- * @param AvroIODatumReader $datum_reader reader that understands
- * the data schema
- * @throws AvroDataIOException if $io is not an instance of AvroIO
- * or the codec specified in the header
- * is not supported
- * @uses read_header()
- */
- public function __construct($io, $datum_reader)
- {
-
- if (!($io instanceof AvroIO))
- throw new AvroDataIOException('io must be instance of AvroIO');
-
- $this->io = $io;
- $this->decoder = new AvroIOBinaryDecoder($this->io);
- $this->datum_reader = $datum_reader;
- $this->read_header();
-
- $codec = AvroUtil::array_value($this->metadata,
- AvroDataIO::METADATA_CODEC_ATTR);
- if ($codec && !AvroDataIO::is_valid_codec($codec))
- throw new AvroDataIOException(sprintf('Unknown codec: %s', $codec));
- $this->codec = $codec;
-
- $this->block_count = 0;
- // FIXME: Seems unsanitary to set writers_schema here.
- // Can't constructor take it as an argument?
- $this->datum_reader->set_writers_schema(
- AvroSchema::parse($this->metadata[AvroDataIO::METADATA_SCHEMA_ATTR]));
- }
-
- /**
- * Reads header of object container
- * @throws AvroDataIOException if the file is not an Avro data file.
- */
- private function read_header()
- {
- $this->seek(0, AvroIO::SEEK_SET);
-
- $magic = $this->read(AvroDataIO::magic_size());
-
- if (strlen($magic) < AvroDataIO::magic_size())
- throw new AvroDataIOException(
- 'Not an Avro data file: shorter than the Avro magic block');
-
- if (AvroDataIO::magic() != $magic)
- throw new AvroDataIOException(
- sprintf('Not an Avro data file: %s does not match %s',
- $magic, AvroDataIO::magic()));
-
- $this->metadata = $this->datum_reader->read_data(AvroDataIO::metadata_schema(),
- AvroDataIO::metadata_schema(),
- $this->decoder);
- $this->sync_marker = $this->read(AvroDataIO::SYNC_SIZE);
- }
-
- /**
- * @internal Would be nice to implement data() as an iterator, I think
- * @returns array of data from object container.
- */
- public function data()
- {
- $data = array();
- while (true)
- {
- if (0 == $this->block_count)
- {
- if ($this->is_eof())
- break;
-
- if ($this->skip_sync())
- if ($this->is_eof())
- break;
-
- $length = $this->read_block_header();
- $decoder = $this->decoder;
- if ($this->codec == AvroDataIO::DEFLATE_CODEC) {
- $compressed = $decoder->read($length);
- $datum = gzinflate($compressed);
- $decoder = new AvroIOBinaryDecoder(new AvroStringIO($datum));
- } elseif ($this->codec === AvroDataIO::ZSTANDARD_CODEC) {
- if (!extension_loaded('zstd')) {
- throw new AvroException('Please install ext-zstd to use zstandard compression.');
- }
- $compressed = $decoder->read($length);
- $datum = zstd_uncompress($compressed);
- $decoder = new AvroIOBinaryDecoder(new AvroStringIO($datum));
- } elseif ($this->codec === AvroDataIO::SNAPPY_CODEC) {
- if (!extension_loaded('snappy')) {
- throw new AvroException('Please install ext-snappy to use snappy compression.');
- }
- $compressed = $decoder->read($length);
- $crc32 = unpack('N', substr($compressed, -4))[1];
- $datum = snappy_uncompress(substr($compressed, 0, -4));
- if ($crc32 === crc32($datum)) {
- $decoder = new AvroIOBinaryDecoder(new AvroStringIO($datum));
- } else {
- $decoder = new AvroIOBinaryDecoder(new AvroStringIO(snappy_uncompress($datum)));
- }
- } elseif ($this->codec === AvroDataIO::BZIP2_CODEC) {
- if (!extension_loaded('bz2')) {
- throw new AvroException('Please install ext-bz2 to use bzip2 compression.');
- }
- $compressed = $decoder->read($length);
- $datum = bzdecompress($compressed);
- $decoder = new AvroIOBinaryDecoder(new AvroStringIO($datum));
- }
- }
- $data[] = $this->datum_reader->read($decoder);
- $this->block_count -= 1;
- }
- return $data;
- }
-
- /**
- * Closes this writer (and its AvroIO object.)
- * @uses AvroIO::close()
- */
- public function close() { return $this->io->close(); }
-
- /**
- * @uses AvroIO::seek()
- */
- private function seek($offset, $whence)
- {
- return $this->io->seek($offset, $whence);
- }
-
- /**
- * @uses AvroIO::read()
- */
- private function read($len) { return $this->io->read($len); }
-
- /**
- * @uses AvroIO::is_eof()
- */
- private function is_eof() { return $this->io->is_eof(); }
-
- private function skip_sync()
- {
- $proposed_sync_marker = $this->read(AvroDataIO::SYNC_SIZE);
- if ($proposed_sync_marker != $this->sync_marker)
- {
- $this->seek(-AvroDataIO::SYNC_SIZE, AvroIO::SEEK_CUR);
- return false;
- }
- return true;
- }
-
- /**
- * Reads the block header (which includes the count of items in the block
- * and the length in bytes of the block)
- * @returns int length in bytes of the block.
- */
- private function read_block_header()
- {
- $this->block_count = $this->decoder->read_long();
- return $this->decoder->read_long();
- }
-
-}
-
-/**
- * Writes Avro data to an AvroIO source using an AvroSchema
- * @package Avro
- */
-class AvroDataIOWriter
-{
- /**
- * @returns string a new, unique sync marker.
- */
- private static function generate_sync_marker()
- {
- // From https://php.net/manual/en/function.mt-rand.php comments
- return pack('S8',
- mt_rand(0, 0xffff), mt_rand(0, 0xffff),
- mt_rand(0, 0xffff),
- mt_rand(0, 0xffff) | 0x4000,
- mt_rand(0, 0xffff) | 0x8000,
- mt_rand(0, 0xffff), mt_rand(0, 0xffff), mt_rand(0, 0xffff));
- }
-
- /**
- * @var AvroIO object container where data is written
- */
- private $io;
-
- /**
- * @var AvroIOBinaryEncoder encoder for object container
- */
- private $encoder;
-
- /**
- * @var AvroDatumWriter
- */
- private $datum_writer;
-
- /**
- * @var AvroStringIO buffer for writing
- */
- private $buffer;
-
- /**
- * @var AvroIOBinaryEncoder encoder for buffer
- */
- private $buffer_encoder; // AvroIOBinaryEncoder
-
- /**
- * @var int count of items written to block
- */
- private $block_count;
-
- /**
- * @var array map of object container metadata
- */
- private $metadata;
-
- /**
- * @var compression codec
- */
- private $codec;
-
- /**
- * @param AvroIO $io
- * @param AvroIODatumWriter $datum_writer
- * @param AvroSchema $writers_schema
- * @param string $codec
- */
- public function __construct($io, $datum_writer, $writers_schema=null, $codec=AvroDataIO::NULL_CODEC)
- {
- if (!($io instanceof AvroIO))
- throw new AvroDataIOException('io must be instance of AvroIO');
-
- $this->io = $io;
- $this->encoder = new AvroIOBinaryEncoder($this->io);
- $this->datum_writer = $datum_writer;
- $this->buffer = new AvroStringIO();
- $this->buffer_encoder = new AvroIOBinaryEncoder($this->buffer);
- $this->block_count = 0;
- $this->metadata = array();
-
- if ($writers_schema)
- {
- if (!AvroDataIO::is_valid_codec($codec))
- throw new AvroDataIOException(
- sprintf('codec %s is not supported', $codec));
-
- $this->sync_marker = self::generate_sync_marker();
- $this->metadata[AvroDataIO::METADATA_CODEC_ATTR] = $this->codec = $codec;
- $this->metadata[AvroDataIO::METADATA_SCHEMA_ATTR] = (string) $writers_schema;
- $this->write_header();
- }
- else
- {
- $dfr = new AvroDataIOReader($this->io, new AvroIODatumReader());
- $this->sync_marker = $dfr->sync_marker;
- $this->metadata[AvroDataIO::METADATA_CODEC_ATTR] = $this->codec
- = $dfr->metadata[AvroDataIO::METADATA_CODEC_ATTR];
- $schema_from_file = $dfr->metadata[AvroDataIO::METADATA_SCHEMA_ATTR];
- $this->metadata[AvroDataIO::METADATA_SCHEMA_ATTR] = $schema_from_file;
- $this->datum_writer->writers_schema = AvroSchema::parse($schema_from_file);
- $this->seek(0, SEEK_END);
- }
- }
-
- /**
- * @param mixed $datum
- */
- public function append($datum)
- {
- $this->datum_writer->write($datum, $this->buffer_encoder);
- $this->block_count++;
-
- if ($this->buffer->length() >= AvroDataIO::SYNC_INTERVAL)
- $this->write_block();
- }
-
- /**
- * Flushes buffer to AvroIO object container and closes it.
- * @return mixed value of $io->close()
- * @see AvroIO::close()
- */
- public function close()
- {
- $this->flush();
- return $this->io->close();
- }
-
- /**
- * Flushes biffer to AvroIO object container.
- * @returns mixed value of $io->flush()
- * @see AvroIO::flush()
- */
- private function flush()
- {
- $this->write_block();
- return $this->io->flush();
- }
-
- /**
- * Writes a block of data to the AvroIO object container.
- */
- private function write_block()
- {
- if ($this->block_count > 0)
- {
- $this->encoder->write_long($this->block_count);
- $to_write = (string) $this->buffer;
-
- if ($this->codec === AvroDataIO::DEFLATE_CODEC) {
- $to_write = gzdeflate($to_write);
- } elseif ($this->codec === AvroDataIO::ZSTANDARD_CODEC) {
- if (!extension_loaded('zstd')) {
- throw new AvroException('Please install ext-zstd to use zstandard compression.');
- }
- $to_write = zstd_compress($to_write);
- } elseif ($this->codec === AvroDataIO::SNAPPY_CODEC) {
- if (!extension_loaded('snappy')) {
- throw new AvroException('Please install ext-snappy to use snappy compression.');
- }
- $crc32 = crc32($to_write);
- $compressed = snappy_compress($to_write);
- $to_write = pack('a*N', $compressed, $crc32);
- } elseif ($this->codec === AvroDataIO::BZIP2_CODEC) {
- if (!extension_loaded('bz2')) {
- throw new AvroException('Please install ext-bz2 to use bzip2 compression.');
- }
- $to_write = bzcompress($to_write);
- }
-
- $this->encoder->write_long(strlen($to_write));
- $this->write($to_write);
- $this->write($this->sync_marker);
- $this->buffer->truncate();
- $this->block_count = 0;
- }
- }
-
- /**
- * Writes the header of the AvroIO object container
- */
- private function write_header()
- {
- $this->write(AvroDataIO::magic());
- $this->datum_writer->write_data(AvroDataIO::metadata_schema(),
- $this->metadata, $this->encoder);
- $this->write($this->sync_marker);
- }
-
- /**
- * @param string $bytes
- * @uses AvroIO::write()
- */
- private function write($bytes) { return $this->io->write($bytes); }
-
- /**
- * @param int $offset
- * @param int $whence
- * @uses AvroIO::seek()
- */
- private function seek($offset, $whence)
- {
- return $this->io->seek($offset, $whence);
- }
-}
diff --git a/lang/php/lib/avro/datum.php b/lang/php/lib/avro/datum.php
deleted file mode 100644
index 68cb4ea..0000000
--- a/lang/php/lib/avro/datum.php
+++ /dev/null
@@ -1,984 +0,0 @@
-<?php
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Classes for reading and writing Avro data to AvroIO objects.
- *
- * @package Avro
- *
- * @todo Implement JSON encoding, as is required by the Avro spec.
- */
-
-/**
- * Exceptions arising from writing or reading Avro data.
- *
- * @package Avro
- */
-class AvroIOTypeException extends AvroException
-{
- /**
- * @param AvroSchema $expected_schema
- * @param mixed $datum
- */
- public function __construct($expected_schema, $datum)
- {
- parent::__construct(sprintf('The datum %s is not an example of schema %s',
- var_export($datum, true), $expected_schema));
- }
-}
-
-/**
- * Exceptions arising from incompatibility between
- * reader and writer schemas.
- *
- * @package Avro
- */
-class AvroIOSchemaMatchException extends AvroException
-{
- /**
- * @param AvroSchema $writers_schema
- * @param AvroSchema $readers_schema
- */
- function __construct($writers_schema, $readers_schema)
- {
- parent::__construct(
- sprintf("Writer's schema %s and Reader's schema %s do not match.",
- $writers_schema, $readers_schema));
- }
-}
-
-/**
- * Handles schema-specific writing of data to the encoder.
- *
- * Ensures that each datum written is consistent with the writer's schema.
- *
- * @package Avro
- */
-class AvroIODatumWriter
-{
- /**
- * Schema used by this instance to write Avro data.
- * @var AvroSchema
- */
- public $writers_schema;
-
- /**
- * @param AvroSchema $writers_schema
- */
- function __construct($writers_schema=null)
- {
- $this->writers_schema = $writers_schema;
- }
-
- /**
- * @param AvroSchema $writers_schema
- * @param $datum
- * @param AvroIOBinaryEncoder $encoder
- * @returns mixed
- *
- * @throws AvroIOTypeException if $datum is invalid for $writers_schema
- */
- function write_data($writers_schema, $datum, $encoder)
- {
- if (!AvroSchema::is_valid_datum($writers_schema, $datum))
- throw new AvroIOTypeException($writers_schema, $datum);
-
- switch ($writers_schema->type())
- {
- case AvroSchema::NULL_TYPE:
- return $encoder->write_null($datum);
- case AvroSchema::BOOLEAN_TYPE:
- return $encoder->write_boolean($datum);
- case AvroSchema::INT_TYPE:
- return $encoder->write_int($datum);
- case AvroSchema::LONG_TYPE:
- return $encoder->write_long($datum);
- case AvroSchema::FLOAT_TYPE:
- return $encoder->write_float($datum);
- case AvroSchema::DOUBLE_TYPE:
- return $encoder->write_double($datum);
- case AvroSchema::STRING_TYPE:
- return $encoder->write_string($datum);
- case AvroSchema::BYTES_TYPE:
- return $encoder->write_bytes($datum);
- case AvroSchema::ARRAY_SCHEMA:
- return $this->write_array($writers_schema, $datum, $encoder);
- case AvroSchema::MAP_SCHEMA:
- return $this->write_map($writers_schema, $datum, $encoder);
- case AvroSchema::FIXED_SCHEMA:
- return $this->write_fixed($writers_schema, $datum, $encoder);
- case AvroSchema::ENUM_SCHEMA:
- return $this->write_enum($writers_schema, $datum, $encoder);
- case AvroSchema::RECORD_SCHEMA:
- case AvroSchema::ERROR_SCHEMA:
- case AvroSchema::REQUEST_SCHEMA:
- return $this->write_record($writers_schema, $datum, $encoder);
- case AvroSchema::UNION_SCHEMA:
- return $this->write_union($writers_schema, $datum, $encoder);
- default:
- throw new AvroException(sprintf('Unknown type: %s',
- $writers_schema->type));
- }
- }
-
- /**
- * @param $datum
- * @param AvroIOBinaryEncoder $encoder
- */
- function write($datum, $encoder)
- {
- $this->write_data($this->writers_schema, $datum, $encoder);
- }
-
- /**#@+
- * @param AvroSchema $writers_schema
- * @param null|boolean|int|float|string|array $datum item to be written
- * @param AvroIOBinaryEncoder $encoder
- */
- private function write_array($writers_schema, $datum, $encoder)
- {
- $datum_count = count($datum);
- if (0 < $datum_count)
- {
- $encoder->write_long($datum_count);
- $items = $writers_schema->items();
- foreach ($datum as $item)
- $this->write_data($items, $item, $encoder);
- }
- return $encoder->write_long(0);
- }
-
- private function write_map($writers_schema, $datum, $encoder)
- {
- $datum_count = count($datum);
- if ($datum_count > 0)
- {
- $encoder->write_long($datum_count);
- foreach ($datum as $k => $v)
- {
- $encoder->write_string($k);
- $this->write_data($writers_schema->values(), $v, $encoder);
- }
- }
- $encoder->write_long(0);
- }
-
- private function write_union($writers_schema, $datum, $encoder)
- {
- $datum_schema_index = -1;
- $datum_schema = null;
- foreach ($writers_schema->schemas() as $index => $schema)
- if (AvroSchema::is_valid_datum($schema, $datum))
- {
- $datum_schema_index = $index;
- $datum_schema = $schema;
- break;
- }
-
- if (is_null($datum_schema))
- throw new AvroIOTypeException($writers_schema, $datum);
-
- $encoder->write_long($datum_schema_index);
- $this->write_data($datum_schema, $datum, $encoder);
- }
-
- private function write_enum($writers_schema, $datum, $encoder)
- {
- $datum_index = $writers_schema->symbol_index($datum);
- return $encoder->write_int($datum_index);
- }
-
- private function write_fixed($writers_schema, $datum, $encoder)
- {
- /**
- * NOTE Unused $writers_schema parameter included for consistency
- * with other write_* methods.
- */
- return $encoder->write($datum);
- }
-
- private function write_record($writers_schema, $datum, $encoder)
- {
- foreach ($writers_schema->fields() as $field)
- $this->write_data($field->type(), $datum[$field->name()], $encoder);
- }
-
- /**#@-*/
-}
-
-/**
- * Encodes and writes Avro data to an AvroIO object using
- * Avro binary encoding.
- *
- * @package Avro
- */
-class AvroIOBinaryEncoder
-{
- /**
- * Performs encoding of the given float value to a binary string
- *
- * XXX: This is <b>not</b> endian-aware! The {@link Avro::check_platform()}
- * called in {@link AvroIOBinaryEncoder::__construct()} should ensure the
- * library is only used on little-endian platforms, which ensure the little-endian
- * encoding required by the Avro spec.
- *
- * @param float $float
- * @returns string bytes
- * @see Avro::check_platform()
- */
- static function float_to_int_bits($float)
- {
- return pack('f', (float) $float);
- }
-
- /**
- * Performs encoding of the given double value to a binary string
- *
- * XXX: This is <b>not</b> endian-aware! See comments in
- * {@link AvroIOBinaryEncoder::float_to_int_bits()} for details.
- *
- * @param double $double
- * @returns string bytes
- */
- static function double_to_long_bits($double)
- {
- return pack('d', (double) $double);
- }
-
- /**
- * @param int|string $n
- * @returns string long $n encoded as bytes
- * @internal This relies on 64-bit PHP.
- */
- static public function encode_long($n)
- {
- $n = (int) $n;
- $n = ($n << 1) ^ ($n >> 63);
- $str = '';
- while (0 != ($n & ~0x7F))
- {
- $str .= chr(($n & 0x7F) | 0x80);
- $n >>= 7;
- }
- $str .= chr($n);
- return $str;
- }
-
- /**
- * @var AvroIO
- */
- private $io;
-
- /**
- * @param AvroIO $io object to which data is to be written.
- *
- */
- function __construct($io)
- {
- Avro::check_platform();
- $this->io = $io;
- }
-
- /**
- * @param null $datum actual value is ignored
- */
- function write_null($datum) { return null; }
-
- /**
- * @param boolean $datum
- */
- function write_boolean($datum)
- {
- $byte = $datum ? chr(1) : chr(0);
- $this->write($byte);
- }
-
- /**
- * @param int $datum
- */
- function write_int($datum) { $this->write_long($datum); }
-
- /**
- * @param int $n
- */
- function write_long($n)
- {
- if (Avro::uses_gmp())
- $this->write(AvroGMP::encode_long($n));
- else
- $this->write(self::encode_long($n));
- }
-
- /**
- * @param float $datum
- * @uses self::float_to_int_bits()
- */
- public function write_float($datum)
- {
- $this->write(self::float_to_int_bits($datum));
- }
-
- /**
- * @param float $datum
- * @uses self::double_to_long_bits()
- */
- public function write_double($datum)
- {
- $this->write(self::double_to_long_bits($datum));
- }
-
- /**
- * @param string $str
- * @uses self::write_bytes()
- */
- function write_string($str) { $this->write_bytes($str); }
-
- /**
- * @param string $bytes
- */
- function write_bytes($bytes)
- {
- $this->write_long(strlen($bytes));
- $this->write($bytes);
- }
-
- /**
- * @param string $datum
- */
- function write($datum) { $this->io->write($datum); }
-}
-
-/**
- * Handles schema-specifc reading of data from the decoder.
- *
- * Also handles schema resolution between the reader and writer
- * schemas (if a writer's schema is provided).
- *
- * @package Avro
- */
-class AvroIODatumReader
-{
- /**
- *
- * @param AvroSchema $writers_schema
- * @param AvroSchema $readers_schema
- * @returns boolean true if the schemas are consistent with
- * each other and false otherwise.
- */
- static function schemas_match($writers_schema, $readers_schema)
- {
- $writers_schema_type = $writers_schema->type;
- $readers_schema_type = $readers_schema->type;
-
- if (AvroSchema::UNION_SCHEMA == $writers_schema_type
- || AvroSchema::UNION_SCHEMA == $readers_schema_type)
- return true;
-
- if ($writers_schema_type == $readers_schema_type)
- {
- if (AvroSchema::is_primitive_type($writers_schema_type))
- return true;
-
- switch ($readers_schema_type)
- {
- case AvroSchema::MAP_SCHEMA:
- return self::attributes_match($writers_schema->values(),
- $readers_schema->values(),
- array(AvroSchema::TYPE_ATTR));
- case AvroSchema::ARRAY_SCHEMA:
- return self::attributes_match($writers_schema->items(),
- $readers_schema->items(),
- array(AvroSchema::TYPE_ATTR));
- case AvroSchema::ENUM_SCHEMA:
- return self::attributes_match($writers_schema, $readers_schema,
- array(AvroSchema::FULLNAME_ATTR));
- case AvroSchema::FIXED_SCHEMA:
- return self::attributes_match($writers_schema, $readers_schema,
- array(AvroSchema::FULLNAME_ATTR,
- AvroSchema::SIZE_ATTR));
- case AvroSchema::RECORD_SCHEMA:
- case AvroSchema::ERROR_SCHEMA:
- return self::attributes_match($writers_schema, $readers_schema,
- array(AvroSchema::FULLNAME_ATTR));
- case AvroSchema::REQUEST_SCHEMA:
- // XXX: This seems wrong
- return true;
- // XXX: no default
- }
-
- if (AvroSchema::INT_TYPE == $writers_schema_type
- && in_array($readers_schema_type, array(AvroSchema::LONG_TYPE,
- AvroSchema::FLOAT_TYPE,
- AvroSchema::DOUBLE_TYPE)))
- return true;
-
- if (AvroSchema::LONG_TYPE == $writers_schema_type
- && in_array($readers_schema_type, array(AvroSchema::FLOAT_TYPE,
- AvroSchema::DOUBLE_TYPE)))
- return true;
-
- if (AvroSchema::FLOAT_TYPE == $writers_schema_type
- && AvroSchema::DOUBLE_TYPE == $readers_schema_type)
- return true;
-
- return false;
- }
-
- }
-
- /**
- * Checks equivalence of the given attributes of the two given schemas.
- *
- * @param AvroSchema $schema_one
- * @param AvroSchema $schema_two
- * @param string[] $attribute_names array of string attribute names to compare
- *
- * @returns boolean true if the attributes match and false otherwise.
- */
- static function attributes_match($schema_one, $schema_two, $attribute_names)
- {
- foreach ($attribute_names as $attribute_name)
- if ($schema_one->attribute($attribute_name)
- != $schema_two->attribute($attribute_name))
- return false;
- return true;
- }
-
- /**
- * @var AvroSchema
- */
- private $writers_schema;
-
- /**
- * @var AvroSchema
- */
- private $readers_schema;
-
- /**
- * @param AvroSchema $writers_schema
- * @param AvroSchema $readers_schema
- */
- function __construct($writers_schema=null, $readers_schema=null)
- {
- $this->writers_schema = $writers_schema;
- $this->readers_schema = $readers_schema;
- }
-
- /**
- * @param AvroSchema $readers_schema
- */
- public function set_writers_schema($readers_schema)
- {
- $this->writers_schema = $readers_schema;
- }
-
- /**
- * @param AvroIOBinaryDecoder $decoder
- * @returns string
- */
- public function read($decoder)
- {
- if (is_null($this->readers_schema))
- $this->readers_schema = $this->writers_schema;
- return $this->read_data($this->writers_schema, $this->readers_schema,
- $decoder);
- }
-
- /**#@+
- * @param AvroSchema $writers_schema
- * @param AvroSchema $readers_schema
- * @param AvroIOBinaryDecoder $decoder
- */
- /**
- * @returns mixed
- */
- public function read_data($writers_schema, $readers_schema, $decoder)
- {
- if (!self::schemas_match($writers_schema, $readers_schema))
- throw new AvroIOSchemaMatchException($writers_schema, $readers_schema);
-
- // Schema resolution: reader's schema is a union, writer's schema is not
- if (AvroSchema::UNION_SCHEMA == $readers_schema->type()
- && AvroSchema::UNION_SCHEMA != $writers_schema->type())
- {
- foreach ($readers_schema->schemas() as $schema)
- if (self::schemas_match($writers_schema, $schema))
- return $this->read_data($writers_schema, $schema, $decoder);
- throw new AvroIOSchemaMatchException($writers_schema, $readers_schema);
- }
-
- switch ($writers_schema->type())
- {
- case AvroSchema::NULL_TYPE:
- return $decoder->read_null();
- case AvroSchema::BOOLEAN_TYPE:
- return $decoder->read_boolean();
- case AvroSchema::INT_TYPE:
- return $decoder->read_int();
- case AvroSchema::LONG_TYPE:
- return $decoder->read_long();
- case AvroSchema::FLOAT_TYPE:
- return $decoder->read_float();
- case AvroSchema::DOUBLE_TYPE:
- return $decoder->read_double();
- case AvroSchema::STRING_TYPE:
- return $decoder->read_string();
- case AvroSchema::BYTES_TYPE:
- return $decoder->read_bytes();
- case AvroSchema::ARRAY_SCHEMA:
- return $this->read_array($writers_schema, $readers_schema, $decoder);
- case AvroSchema::MAP_SCHEMA:
- return $this->read_map($writers_schema, $readers_schema, $decoder);
- case AvroSchema::UNION_SCHEMA:
- return $this->read_union($writers_schema, $readers_schema, $decoder);
- case AvroSchema::ENUM_SCHEMA:
- return $this->read_enum($writers_schema, $readers_schema, $decoder);
- case AvroSchema::FIXED_SCHEMA:
- return $this->read_fixed($writers_schema, $readers_schema, $decoder);
- case AvroSchema::RECORD_SCHEMA:
- case AvroSchema::ERROR_SCHEMA:
- case AvroSchema::REQUEST_SCHEMA:
- return $this->read_record($writers_schema, $readers_schema, $decoder);
- default:
- throw new AvroException(sprintf("Cannot read unknown schema type: %s",
- $writers_schema->type()));
- }
- }
-
- /**
- * @returns array
- */
- public function read_array($writers_schema, $readers_schema, $decoder)
- {
- $items = array();
- $block_count = $decoder->read_long();
- while (0 != $block_count)
- {
- if ($block_count < 0)
- {
- $block_count = -$block_count;
- $block_size = $decoder->read_long(); // Read (and ignore) block size
- }
- for ($i = 0; $i < $block_count; $i++)
- $items []= $this->read_data($writers_schema->items(),
- $readers_schema->items(),
- $decoder);
- $block_count = $decoder->read_long();
- }
- return $items;
- }
-
- /**
- * @returns array
- */
- public function read_map($writers_schema, $readers_schema, $decoder)
- {
- $items = array();
- $pair_count = $decoder->read_long();
- while (0 != $pair_count)
- {
- if ($pair_count < 0)
- {
- $pair_count = -$pair_count;
- // Note: we're not doing anything with block_size other than skipping it
- $block_size = $decoder->read_long();
- }
-
- for ($i = 0; $i < $pair_count; $i++)
- {
- $key = $decoder->read_string();
- $items[$key] = $this->read_data($writers_schema->values(),
- $readers_schema->values(),
- $decoder);
- }
- $pair_count = $decoder->read_long();
- }
- return $items;
- }
-
- /**
- * @returns mixed
- */
- public function read_union($writers_schema, $readers_schema, $decoder)
- {
- $schema_index = $decoder->read_long();
- $selected_writers_schema = $writers_schema->schema_by_index($schema_index);
- return $this->read_data($selected_writers_schema, $readers_schema, $decoder);
- }
-
- /**
- * @returns string
- */
- public function read_enum($writers_schema, $readers_schema, $decoder)
- {
- $symbol_index = $decoder->read_int();
- $symbol = $writers_schema->symbol_by_index($symbol_index);
- if (!$readers_schema->has_symbol($symbol))
- null; // FIXME: unset wrt schema resolution
- return $symbol;
- }
-
- /**
- * @returns string
- */
- public function read_fixed($writers_schema, $readers_schema, $decoder)
- {
- return $decoder->read($writers_schema->size());
- }
-
- /**
- * @returns array
- */
- public function read_record($writers_schema, $readers_schema, $decoder)
- {
- $readers_fields = $readers_schema->fields_hash();
- $record = array();
- foreach ($writers_schema->fields() as $writers_field)
- {
- $type = $writers_field->type();
- if (isset($readers_fields[$writers_field->name()]))
- $record[$writers_field->name()]
- = $this->read_data($type,
- $readers_fields[$writers_field->name()]->type(),
- $decoder);
- else
- $this->skip_data($type, $decoder);
- }
- // Fill in default values
- if (count($readers_fields) > count($record))
- {
- $writers_fields = $writers_schema->fields_hash();
- foreach ($readers_fields as $field_name => $field)
- {
- if (!isset($writers_fields[$field_name]))
- {
- if ($field->has_default_value())
- $record[$field->name()]
- = $this->read_default_value($field->type(),
- $field->default_value());
- else
- null; // FIXME: unset
- }
- }
- }
-
- return $record;
- }
- /**#@-*/
-
- /**
- * @param AvroSchema $field_schema
- * @param null|boolean|int|float|string|array $default_value
- * @returns null|boolean|int|float|string|array
- *
- * @throws AvroException if $field_schema type is unknown.
- */
- public function read_default_value($field_schema, $default_value)
- {
- switch($field_schema->type())
- {
- case AvroSchema::NULL_TYPE:
- return null;
- case AvroSchema::BOOLEAN_TYPE:
- return $default_value;
- case AvroSchema::INT_TYPE:
- case AvroSchema::LONG_TYPE:
- return (int) $default_value;
- case AvroSchema::FLOAT_TYPE:
- case AvroSchema::DOUBLE_TYPE:
- return (float) $default_value;
- case AvroSchema::STRING_TYPE:
- case AvroSchema::BYTES_TYPE:
- return $default_value;
- case AvroSchema::ARRAY_SCHEMA:
- $array = array();
- foreach ($default_value as $json_val)
- {
- $val = $this->read_default_value($field_schema->items(), $json_val);
- $array []= $val;
- }
- return $array;
- case AvroSchema::MAP_SCHEMA:
- $map = array();
- foreach ($default_value as $key => $json_val)
- $map[$key] = $this->read_default_value($field_schema->values(),
- $json_val);
- return $map;
- case AvroSchema::UNION_SCHEMA:
- return $this->read_default_value($field_schema->schema_by_index(0),
- $default_value);
- case AvroSchema::ENUM_SCHEMA:
- case AvroSchema::FIXED_SCHEMA:
- return $default_value;
- case AvroSchema::RECORD_SCHEMA:
- $record = array();
- foreach ($field_schema->fields() as $field)
- {
- $field_name = $field->name();
- if (!$json_val = $default_value[$field_name])
- $json_val = $field->default_value();
-
- $record[$field_name] = $this->read_default_value($field->type(),
- $json_val);
- }
- return $record;
- default:
- throw new AvroException(sprintf('Unknown type: %s', $field_schema->type()));
- }
- }
-
- /**
- * @param AvroSchema $writers_schema
- * @param AvroIOBinaryDecoder $decoder
- */
- private function skip_data($writers_schema, $decoder)
- {
- switch ($writers_schema->type())
- {
- case AvroSchema::NULL_TYPE:
- return $decoder->skip_null();
- case AvroSchema::BOOLEAN_TYPE:
- return $decoder->skip_boolean();
- case AvroSchema::INT_TYPE:
- return $decoder->skip_int();
- case AvroSchema::LONG_TYPE:
- return $decoder->skip_long();
- case AvroSchema::FLOAT_TYPE:
- return $decoder->skip_float();
- case AvroSchema::DOUBLE_TYPE:
- return $decoder->skip_double();
- case AvroSchema::STRING_TYPE:
- return $decoder->skip_string();
- case AvroSchema::BYTES_TYPE:
- return $decoder->skip_bytes();
- case AvroSchema::ARRAY_SCHEMA:
- return $decoder->skip_array($writers_schema, $decoder);
- case AvroSchema::MAP_SCHEMA:
- return $decoder->skip_map($writers_schema, $decoder);
- case AvroSchema::UNION_SCHEMA:
- return $decoder->skip_union($writers_schema, $decoder);
- case AvroSchema::ENUM_SCHEMA:
- return $decoder->skip_enum($writers_schema, $decoder);
- case AvroSchema::FIXED_SCHEMA:
- return $decoder->skip_fixed($writers_schema, $decoder);
- case AvroSchema::RECORD_SCHEMA:
- case AvroSchema::ERROR_SCHEMA:
- case AvroSchema::REQUEST_SCHEMA:
- return $decoder->skip_record($writers_schema, $decoder);
- default:
- throw new AvroException(sprintf('Unknown schema type: %s',
- $writers_schema->type()));
- }
- }
-}
-
-/**
- * Decodes and reads Avro data from an AvroIO object encoded using
- * Avro binary encoding.
- *
- * @package Avro
- */
-class AvroIOBinaryDecoder
-{
-
- /**
- * @param int[] array of byte ascii values
- * @returns long decoded value
- * @internal Requires 64-bit platform
- */
- public static function decode_long_from_array($bytes)
- {
- $b = array_shift($bytes);
- $n = $b & 0x7f;
- $shift = 7;
- while (0 != ($b & 0x80))
- {
- $b = array_shift($bytes);
- $n |= (($b & 0x7f) << $shift);
- $shift += 7;
- }
- return (($n >> 1) ^ -($n & 1));
- }
-
- /**
- * Performs decoding of the binary string to a float value.
- *
- * XXX: This is <b>not</b> endian-aware! See comments in
- * {@link AvroIOBinaryEncoder::float_to_int_bits()} for details.
- *
- * @param string $bits
- * @returns float
- */
- static public function int_bits_to_float($bits)
- {
- $float = unpack('f', $bits);
- return (float) $float[1];
- }
-
- /**
- * Performs decoding of the binary string to a double value.
- *
- * XXX: This is <b>not</b> endian-aware! See comments in
- * {@link AvroIOBinaryEncoder::float_to_int_bits()} for details.
- *
- * @param string $bits
- * @returns float
- */
- static public function long_bits_to_double($bits)
- {
- $double = unpack('d', $bits);
- return (double) $double[1];
- }
-
- /**
- * @var AvroIO
- */
- private $io;
-
- /**
- * @param AvroIO $io object from which to read.
- */
- public function __construct($io)
- {
- Avro::check_platform();
- $this->io = $io;
- }
-
- /**
- * @returns string the next byte from $this->io.
- * @throws AvroException if the next byte cannot be read.
- */
- private function next_byte() { return $this->read(1); }
-
- /**
- * @returns null
- */
- public function read_null() { return null; }
-
- /**
- * @returns boolean
- */
- public function read_boolean()
- {
- return (boolean) (1 == ord($this->next_byte()));
- }
-
- /**
- * @returns int
- */
- public function read_int() { return (int) $this->read_long(); }
-
- /**
- * @returns long
- */
- public function read_long()
- {
- $byte = ord($this->next_byte());
- $bytes = array($byte);
- while (0 != ($byte & 0x80))
- {
- $byte = ord($this->next_byte());
- $bytes []= $byte;
- }
-
- if (Avro::uses_gmp())
- return AvroGMP::decode_long_from_array($bytes);
-
- return self::decode_long_from_array($bytes);
- }
-
- /**
- * @returns float
- */
- public function read_float()
- {
- return self::int_bits_to_float($this->read(4));
- }
-
- /**
- * @returns double
- */
- public function read_double()
- {
- return self::long_bits_to_double($this->read(8));
- }
-
- /**
- * A string is encoded as a long followed by that many bytes
- * of UTF-8 encoded character data.
- * @returns string
- */
- public function read_string() { return $this->read_bytes(); }
-
- /**
- * @returns string
- */
- public function read_bytes() { return $this->read($this->read_long()); }
-
- /**
- * @param int $len count of bytes to read
- * @returns string
- */
- public function read($len) { return $this->io->read($len); }
-
- public function skip_null() { return null; }
-
- public function skip_boolean() { return $this->skip(1); }
-
- public function skip_int() { return $this->skip_long(); }
-
- public function skip_long()
- {
- $b = $this->next_byte();
- while (0 != (ord($b) & 0x80))
- $b = $this->next_byte();
- }
-
- public function skip_float() { return $this->skip(4); }
-
- public function skip_double() { return $this->skip(8); }
-
- public function skip_bytes() { return $this->skip($this->read_long()); }
-
- public function skip_string() { return $this->skip_bytes(); }
-
- /**
- * @param int $len count of bytes to skip
- * @uses AvroIO::seek()
- */
- public function skip($len) { $this->seek($len, AvroIO::SEEK_CUR); }
-
- /**
- * @returns int position of pointer in AvroIO instance
- * @uses AvroIO::tell()
- */
- private function tell() { return $this->io->tell(); }
-
- /**
- * @param int $offset
- * @param int $whence
- * @returns boolean true upon success
- * @uses AvroIO::seek()
- */
- private function seek($offset, $whence)
- {
- return $this->io->seek($offset, $whence);
- }
-}
-
diff --git a/lang/php/lib/avro/debug.php b/lang/php/lib/avro/debug.php
deleted file mode 100644
index 1ca0ed8..0000000
--- a/lang/php/lib/avro/debug.php
+++ /dev/null
@@ -1,194 +0,0 @@
-<?php
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @package Avro
- */
-
-/**
- * Avro library code debugging functions
- * @package Avro
- */
-class AvroDebug
-{
-
- /**
- * @var int high debug level
- */
- const DEBUG5 = 5;
- /**
- * @var int low debug level
- */
- const DEBUG1 = 1;
- /**
- * @var int current debug level
- */
- const DEBUG_LEVEL = self::DEBUG1;
-
- /**
- * @var int $debug_level
- * @returns boolean true if the given $debug_level is equivalent
- * or more verbose than than the current debug level
- * and false otherwise.
- */
- static function is_debug($debug_level=self::DEBUG1)
- {
- return (self::DEBUG_LEVEL >= $debug_level);
- }
-
- /**
- * @param string $format format string for the given arguments. Passed as is
- * to <code>vprintf</code>.
- * @param array $args array of arguments to pass to vsprinf.
- * @param int $debug_level debug level at which to print this statement
- * @returns boolean true
- */
- static function debug($format, $args, $debug_level=self::DEBUG1)
- {
- if (self::is_debug($debug_level))
- vprintf($format . "\n", $args);
- return true;
- }
-
- /**
- * @param string $str
- * @returns string[] array of hex representation of each byte of $str
- */
- static function hex_array($str) { return self::bytes_array($str); }
-
- /**
- * @param string $str
- * @param string $joiner string used to join
- * @returns string hex-represented bytes of each byte of $str
- joined by $joiner
- */
- static function hex_string($str, $joiner=' ')
- {
- return join($joiner, self::hex_array($str));
- }
-
- /**
- * @param string $str
- * @param string $format format to represent bytes
- * @returns string[] array of each byte of $str formatted using $format
- */
- static function bytes_array($str, $format='x%02x')
- {
- $x = array();
- foreach (str_split($str) as $b)
- $x []= sprintf($format, ord($b));
- return $x;
- }
-
- /**
- * @param string $str
- * @returns string[] array of bytes of $str represented in decimal format ('%3d')
- */
- static function dec_array($str) { return self::bytes_array($str, '%3d'); }
-
- /**
- * @param string $str
- * @param string $joiner string to join bytes of $str
- * @returns string of bytes of $str represented in decimal format
- * @uses dec_array()
- */
- static function dec_string($str, $joiner = ' ')
- {
- return join($joiner, self::dec_array($str));
- }
-
- /**
- * @param string $str
- * @param string $format one of 'ctrl', 'hex', or 'dec' for control,
- hexadecimal, or decimal format for bytes.
- - ctrl: ASCII control characters represented as text.
- For example, the null byte is represented as 'NUL'.
- Visible ASCII characters represent themselves, and
- others are represented as a decimal ('%03d')
- - hex: bytes represented in hexadecimal ('%02X')
- - dec: bytes represented in decimal ('%03d')
- * @returns string[] array of bytes represented in the given format.
- */
- static function ascii_array($str, $format='ctrl')
- {
- if (!in_array($format, array('ctrl', 'hex', 'dec')))
- throw new AvroException('Unrecognized format specifier');
-
- $ctrl_chars = array('NUL', 'SOH', 'STX', 'ETX', 'EOT', 'ENQ', 'ACK', 'BEL',
- 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'SO', 'SI',
- 'DLE', 'DC1', 'DC2', 'DC3', 'DC4', 'NAK', 'SYN', 'ETB',
- 'CAN', 'EM', 'SUB', 'ESC', 'FS', 'GS', 'RS', 'US');
- $x = array();
- foreach (str_split($str) as $b)
- {
- $db = ord($b);
- if ($db < 32)
- {
- switch ($format)
- {
- case 'ctrl':
- $x []= str_pad($ctrl_chars[$db], 3, ' ', STR_PAD_LEFT);
- break;
- case 'hex':
- $x []= sprintf("x%02X", $db);
- break;
- case 'dec':
- $x []= str_pad($db, 3, '0', STR_PAD_LEFT);
- break;
- }
- }
- else if ($db < 127)
- $x []= " $b";
- else if ($db == 127)
- {
- switch ($format)
- {
- case 'ctrl':
- $x []= 'DEL';
- break;
- case 'hex':
- $x []= sprintf("x%02X", $db);
- break;
- case 'dec':
- $x []= str_pad($db, 3, '0', STR_PAD_LEFT);
- break;
- }
- }
- else
- if ('hex' == $format)
- $x []= sprintf("x%02X", $db);
- else
- $x []= str_pad($db, 3, '0', STR_PAD_LEFT);
- }
- return $x;
- }
-
- /**
- * @param string $str
- * @param string $format one of 'ctrl', 'hex', or 'dec'.
- * See {@link self::ascii_array()} for more description
- * @param string $joiner
- * @returns string of bytes joined by $joiner
- * @uses ascii_array()
- */
- static function ascii_string($str, $format='ctrl', $joiner = ' ')
- {
- return join($joiner, self::ascii_array($str, $format));
- }
-}
diff --git a/lang/php/lib/avro/gmp.php b/lang/php/lib/avro/gmp.php
deleted file mode 100644
index 2cf3b58..0000000
--- a/lang/php/lib/avro/gmp.php
+++ /dev/null
@@ -1,222 +0,0 @@
-<?php
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @package Avro
- */
-
-/**
- * Methods for handling 64-bit operations using the GMP extension.
- *
- * This is a naive and hackish implementation that is intended
- * to work well enough to support Avro. It has not been tested
- * beyond what's needed to decode and encode long values.
- *
- * @package Avro
- */
-class AvroGMP {
-
- /**
- * @var resource memoized GMP resource for zero
- */
- private static $gmp_0;
-
- /**
- * @returns resource GMP resource for zero
- */
- private static function gmp_0()
- {
- if (!isset(self::$gmp_0))
- self::$gmp_0 = gmp_init('0');
- return self::$gmp_0;
- }
-
- /**
- * @var resource memoized GMP resource for one (1)
- */
- private static $gmp_1;
-
- /**
- * @returns resource GMP resource for one (1)
- */
- private static function gmp_1()
- {
- if (!isset(self::$gmp_1))
- self::$gmp_1 = gmp_init('1');
- return self::$gmp_1;
- }
-
- /**
- * @var resource memoized GMP resource for two (2)
- */
- private static $gmp_2;
-
- /**
- * @returns resource GMP resource for two (2)
- */
- private static function gmp_2()
- {
- if (!isset(self::$gmp_2))
- self::$gmp_2 = gmp_init('2');
- return self::$gmp_2;
- }
-
- /**
- * @var resource memoized GMP resource for 0x7f
- */
- private static $gmp_0x7f;
-
- /**
- * @returns resource GMP resource for 0x7f
- */
- private static function gmp_0x7f()
- {
- if (!isset(self::$gmp_0x7f))
- self::$gmp_0x7f = gmp_init('0x7f');
- return self::$gmp_0x7f;
- }
-
- /**
- * @var resource memoized GMP resource for 64-bit ~0x7f
- */
- private static $gmp_n0x7f;
-
- /**
- * @returns resource GMP resource for 64-bit ~0x7f
- */
- private static function gmp_n0x7f()
- {
- if (!isset(self::$gmp_n0x7f))
- self::$gmp_n0x7f = gmp_init('0xffffffffffffff80');
- return self::$gmp_n0x7f;
- }
-
- /**
- * @var resource memoized GMP resource for 64-bits of 1
- */
- private static $gmp_0xfs;
-
- /**
- * @returns resource GMP resource for 64-bits of 1
- */
- private static function gmp_0xfs()
- {
- if (!isset(self::$gmp_0xfs))
- self::$gmp_0xfs = gmp_init('0xffffffffffffffff');
- return self::$gmp_0xfs;
- }
-
- /**
- * @param GMP resource
- * @returns GMP resource 64-bit two's complement of input.
- */
- static function gmp_twos_complement($g)
- {
- return gmp_neg(gmp_sub(gmp_pow(self::gmp_2(), 64), $g));
- }
-
- /**
- * @interal Only works up to shift 63 (doesn't wrap bits around).
- * @param resource|int|string $g
- * @param int $shift number of bits to shift left
- * @returns resource $g shifted left
- */
- static function shift_left($g, $shift)
- {
- if (0 == $shift)
- return $g;
-
- if (0 > gmp_sign($g))
- $g = self::gmp_twos_complement($g);
-
- $m = gmp_mul($g, gmp_pow(self::gmp_2(), $shift));
- $m = gmp_and($m, self::gmp_0xfs());
- if (gmp_testbit($m, 63))
- $m = gmp_neg(gmp_add(gmp_and(gmp_com($m), self::gmp_0xfs()),
- self::gmp_1()));
- return $m;
- }
-
- /**
- * Arithmetic right shift
- * @param resource|int|string $g
- * @param int $shift number of bits to shift right
- * @returns resource $g shifted right $shift bits
- */
- static function shift_right($g, $shift)
- {
- if (0 == $shift)
- return $g;
-
- if (0 <= gmp_sign($g))
- $m = gmp_div($g, gmp_pow(self::gmp_2(), $shift));
- else // negative
- {
- $g = gmp_and($g, self::gmp_0xfs());
- $m = gmp_div($g, gmp_pow(self::gmp_2(), $shift));
- $m = gmp_and($m, self::gmp_0xfs());
- for ($i = 63; $i >= (63 - $shift); $i--)
- gmp_setbit($m, $i);
-
- $m = gmp_neg(gmp_add(gmp_and(gmp_com($m), self::gmp_0xfs()),
- self::gmp_1()));
- }
-
- return $m;
- }
-
- /**
- * @param int|str $n integer (or string representation of integer) to encode
- * @return string $bytes of the long $n encoded per the Avro spec
- */
- static function encode_long($n)
- {
- $g = gmp_init($n);
- $g = gmp_xor(self::shift_left($g, 1),
- self::shift_right($g, 63));
- $bytes = '';
- while (0 != gmp_cmp(self::gmp_0(), gmp_and($g, self::gmp_n0x7f())))
- {
- $bytes .= chr(gmp_intval(gmp_and($g, self::gmp_0x7f())) | 0x80);
- $g = self::shift_right($g, 7);
- }
- $bytes .= chr(gmp_intval($g));
- return $bytes;
- }
-
- /**
- * @param int[] $bytes array of ascii codes of bytes to decode
- * @return string represenation of decoded long.
- */
- static function decode_long_from_array($bytes)
- {
- $b = array_shift($bytes);
- $g = gmp_init($b & 0x7f);
- $shift = 7;
- while (0 != ($b & 0x80))
- {
- $b = array_shift($bytes);
- $g = gmp_or($g, self::shift_left(($b & 0x7f), $shift));
- $shift += 7;
- }
- $val = gmp_xor(self::shift_right($g, 1), gmp_neg(gmp_and($g, 1)));
- return gmp_strval($val);
- }
-
-}
diff --git a/lang/php/lib/avro/io.php b/lang/php/lib/avro/io.php
deleted file mode 100644
index b1c38fa..0000000
--- a/lang/php/lib/avro/io.php
+++ /dev/null
@@ -1,494 +0,0 @@
-<?php
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Avro IO object classes
- * @package Avro
- */
-
-/**
- * Exceptions associated with AvroIO instances.
- * @package Avro
- */
-class AvroIOException extends AvroException {}
-
-/**
- * Barebones IO base class to provide common interface for file and string
- * access within the Avro classes.
- *
- * @package Avro
- */
-class AvroIO
-{
-
- /**
- * @var string general read mode
- */
- const READ_MODE = 'r';
- /**
- * @var string general write mode.
- */
- const WRITE_MODE = 'w';
-
- /**
- * @var int set position to current index + $offset bytes
- */
- const SEEK_CUR = SEEK_CUR;
- /**
- * @var int set position equal to $offset bytes
- */
- const SEEK_SET = SEEK_SET;
- /**
- * @var int set position to end of file + $offset bytes
- */
- const SEEK_END = SEEK_END;
-
- /**
- * Read $len bytes from AvroIO instance
- * @var int $len
- * @return string bytes read
- */
- public function read($len)
- {
- throw new AvroNotImplementedException('Not implemented');
- }
-
- /**
- * Append bytes to this buffer. (Nothing more is needed to support Avro.)
- * @param str $arg bytes to write
- * @returns int count of bytes written.
- * @throws AvroIOException if $args is not a string value.
- */
- public function write($arg)
- {
- throw new AvroNotImplementedException('Not implemented');
- }
-
- /**
- * Return byte offset within AvroIO instance
- * @return int
- */
- public function tell()
- {
- throw new AvroNotImplementedException('Not implemented');
- }
-
- /**
- * Set the position indicator. The new position, measured in bytes
- * from the beginning of the file, is obtained by adding $offset to
- * the position specified by $whence.
- *
- * @param int $offset
- * @param int $whence one of AvroIO::SEEK_SET, AvroIO::SEEK_CUR,
- * or Avro::SEEK_END
- * @returns boolean true
- *
- * @throws AvroIOException
- */
- public function seek($offset, $whence=self::SEEK_SET)
- {
- throw new AvroNotImplementedException('Not implemented');
- }
-
- /**
- * Flushes any buffered data to the AvroIO object.
- * @returns boolean true upon success.
- */
- public function flush()
- {
- throw new AvroNotImplementedException('Not implemented');
- }
-
- /**
- * Returns whether or not the current position at the end of this AvroIO
- * instance.
- *
- * Note is_eof() is <b>not</b> like eof in C or feof in PHP:
- * it returns TRUE if the *next* read would be end of file,
- * rather than if the *most recent* read read end of file.
- * @returns boolean true if at the end of file, and false otherwise
- */
- public function is_eof()
- {
- throw new AvroNotImplementedException('Not implemented');
- }
-
- /**
- * Closes this AvroIO instance.
- */
- public function close()
- {
- throw new AvroNotImplementedException('Not implemented');
- }
-
-}
-
-/**
- * AvroIO wrapper for string access
- * @package Avro
- */
-class AvroStringIO extends AvroIO
-{
- /**
- * @var string
- */
- private $string_buffer;
- /**
- * @var int current position in string
- */
- private $current_index;
- /**
- * @var boolean whether or not the string is closed.
- */
- private $is_closed;
-
- /**
- * @param string $str initial value of AvroStringIO buffer. Regardless
- * of the initial value, the pointer is set to the
- * beginning of the buffer.
- * @throws AvroIOException if a non-string value is passed as $str
- */
- public function __construct($str = '')
- {
- $this->is_closed = false;
- $this->string_buffer = '';
- $this->current_index = 0;
-
- if (is_string($str))
- $this->string_buffer .= $str;
- else
- throw new AvroIOException(
- sprintf('constructor argument must be a string: %s', gettype($str)));
- }
-
- /**
- * Append bytes to this buffer.
- * (Nothing more is needed to support Avro.)
- * @param str $arg bytes to write
- * @returns int count of bytes written.
- * @throws AvroIOException if $args is not a string value.
- */
- public function write($arg)
- {
- $this->check_closed();
- if (is_string($arg))
- return $this->append_str($arg);
- throw new AvroIOException(
- sprintf('write argument must be a string: (%s) %s',
- gettype($arg), var_export($arg, true)));
- }
-
- /**
- * @returns string bytes read from buffer
- * @todo test for fencepost errors wrt updating current_index
- */
- public function read($len)
- {
- $this->check_closed();
- $read='';
- for($i=$this->current_index; $i<($this->current_index+$len); $i++)
- $read .= $this->string_buffer[$i];
- if (strlen($read) < $len)
- $this->current_index = $this->length();
- else
- $this->current_index += $len;
- return $read;
- }
-
- /**
- * @returns boolean true if successful
- * @throws AvroIOException if the seek failed.
- */
- public function seek($offset, $whence=self::SEEK_SET)
- {
- if (!is_int($offset))
- throw new AvroIOException('Seek offset must be an integer.');
- // Prevent seeking before BOF
- switch ($whence)
- {
- case self::SEEK_SET:
- if (0 > $offset)
- throw new AvroIOException('Cannot seek before beginning of file.');
- $this->current_index = $offset;
- break;
- case self::SEEK_CUR:
- if (0 > $this->current_index + $whence)
- throw new AvroIOException('Cannot seek before beginning of file.');
- $this->current_index += $offset;
- break;
- case self::SEEK_END:
- if (0 > $this->length() + $offset)
- throw new AvroIOException('Cannot seek before beginning of file.');
- $this->current_index = $this->length() + $offset;
- break;
- default:
- throw new AvroIOException(sprintf('Invalid seek whence %d', $whence));
- }
-
- return true;
- }
-
- /**
- * @returns int
- * @see AvroIO::tell()
- */
- public function tell() { return $this->current_index; }
-
- /**
- * @returns boolean
- * @see AvroIO::is_eof()
- */
- public function is_eof()
- {
- return ($this->current_index >= $this->length());
- }
-
- /**
- * No-op provided for compatibility with AvroIO interface.
- * @returns boolean true
- */
- public function flush() { return true; }
-
- /**
- * Marks this buffer as closed.
- * @returns boolean true
- */
- public function close()
- {
- $this->check_closed();
- $this->is_closed = true;
- return true;
- }
-
- /**
- * @throws AvroIOException if the buffer is closed.
- */
- private function check_closed()
- {
- if ($this->is_closed())
- throw new AvroIOException('Buffer is closed');
- }
-
- /**
- * Appends bytes to this buffer.
- * @param string $str
- * @returns integer count of bytes written.
- */
- private function append_str($str)
- {
- $this->check_closed();
- $this->string_buffer .= $str;
- $len = strlen($str);
- $this->current_index += $len;
- return $len;
- }
-
- /**
- * Truncates the truncate buffer to 0 bytes and returns the pointer
- * to the beginning of the buffer.
- * @returns boolean true
- */
- public function truncate()
- {
- $this->check_closed();
- $this->string_buffer = '';
- $this->current_index = 0;
- return true;
- }
-
- /**
- * @returns int count of bytes in the buffer
- * @internal Could probably memoize length for performance, but
- * no need do this yet.
- */
- public function length() { return strlen($this->string_buffer); }
-
- /**
- * @returns string
- */
- public function __toString() { return $this->string_buffer; }
-
-
- /**
- * @returns string
- * @uses self::__toString()
- */
- public function string() { return $this->__toString(); }
-
- /**
- * @returns boolean true if this buffer is closed and false
- * otherwise.
- */
- public function is_closed() { return $this->is_closed; }
-}
-
-/**
- * AvroIO wrapper for PHP file access functions
- * @package Avro
- */
-class AvroFile extends AvroIO
-{
- /**
- * @var string fopen read mode value. Used internally.
- */
- const FOPEN_READ_MODE = 'rb';
-
- /**
- * @var string fopen write mode value. Used internally.
- */
- const FOPEN_WRITE_MODE = 'wb';
-
- /**
- * @var string
- */
- private $file_path;
-
- /**
- * @var resource file handle for AvroFile instance
- */
- private $file_handle;
-
- public function __construct($file_path, $mode = self::READ_MODE)
- {
- /**
- * XXX: should we check for file existence (in case of reading)
- * or anything else about the provided file_path argument?
- */
- $this->file_path = $file_path;
- switch ($mode)
- {
- case self::WRITE_MODE:
- $this->file_handle = fopen($this->file_path, self::FOPEN_WRITE_MODE);
- if (false == $this->file_handle)
- throw new AvroIOException('Could not open file for writing');
- break;
- case self::READ_MODE:
- $this->file_handle = fopen($this->file_path, self::FOPEN_READ_MODE);
- if (false == $this->file_handle)
- throw new AvroIOException('Could not open file for reading');
- break;
- default:
- throw new AvroIOException(
- sprintf("Only modes '%s' and '%s' allowed. You provided '%s'.",
- self::READ_MODE, self::WRITE_MODE, $mode));
- }
- }
-
- /**
- * @returns int count of bytes written
- * @throws AvroIOException if write failed.
- */
- public function write($str)
- {
- $len = fwrite($this->file_handle, $str);
- if (false === $len)
- throw new AvroIOException(sprintf('Could not write to file'));
- return $len;
- }
-
- /**
- * @param int $len count of bytes to read.
- * @returns string bytes read
- * @throws AvroIOException if length value is negative or if the read failed
- */
- public function read($len)
- {
- if (0 > $len)
- throw new AvroIOException(
- sprintf("Invalid length value passed to read: %d", $len));
-
- if (0 == $len)
- return '';
-
- $bytes = fread($this->file_handle, $len);
- if (false === $bytes)
- throw new AvroIOException('Could not read from file');
- return $bytes;
- }
-
- /**
- * @returns int current position within the file
- * @throws AvroFileExcpetion if tell failed.
- */
- public function tell()
- {
- $position = ftell($this->file_handle);
- if (false === $position)
- throw new AvroIOException('Could not execute tell on reader');
- return $position;
- }
-
- /**
- * @param int $offset
- * @param int $whence
- * @returns boolean true upon success
- * @throws AvroIOException if seek failed.
- * @see AvroIO::seek()
- */
- public function seek($offset, $whence = SEEK_SET)
- {
- $res = fseek($this->file_handle, $offset, $whence);
- // Note: does not catch seeking beyond end of file
- if (-1 === $res)
- throw new AvroIOException(
- sprintf("Could not execute seek (offset = %d, whence = %d)",
- $offset, $whence));
- return true;
- }
-
- /**
- * Closes the file.
- * @returns boolean true if successful.
- * @throws AvroIOException if there was an error closing the file.
- */
- public function close()
- {
- $res = fclose($this->file_handle);
- if (false === $res)
- throw new AvroIOException('Error closing file.');
- return $res;
- }
-
- /**
- * @returns boolean true if the pointer is at the end of the file,
- * and false otherwise.
- * @see AvroIO::is_eof() as behavior differs from feof()
- */
- public function is_eof()
- {
- $this->read(1);
- if (feof($this->file_handle))
- return true;
- $this->seek(-1, self::SEEK_CUR);
- return false;
- }
-
- /**
- * @returns boolean true if the flush was successful.
- * @throws AvroIOException if there was an error flushing the file.
- */
- public function flush()
- {
- $res = fflush($this->file_handle);
- if (false === $res)
- throw new AvroIOException('Could not flush file.');
- return true;
- }
-
-}
diff --git a/lang/php/lib/avro/protocol.php b/lang/php/lib/avro/protocol.php
deleted file mode 100644
index 543dd5e..0000000
--- a/lang/php/lib/avro/protocol.php
+++ /dev/null
@@ -1,87 +0,0 @@
-<?php
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @package Avro
- */
-
-/**
- * Avro library for protocols
- * @package Avro
- */
-class AvroProtocol
-{
- public $name;
- public $namespace;
- public $schemata;
- public $messages;
-
- public static function parse($json)
- {
- if (is_null($json))
- throw new AvroProtocolParseException( "Protocol can't be null");
-
- $protocol = new AvroProtocol();
- $protocol->real_parse(json_decode($json, true));
- return $protocol;
- }
-
- function real_parse($avro) {
- $this->protocol = $avro["protocol"];
- $this->namespace = $avro["namespace"];
- $this->schemata = new AvroNamedSchemata();
- $this->name = $avro["protocol"];
-
- if (!is_null($avro["types"])) {
- $types = AvroSchema::real_parse($avro["types"], $this->namespace, $this->schemata);
- }
-
- if (!is_null($avro["messages"])) {
- foreach ($avro["messages"] as $messageName => $messageAvro) {
- $message = new AvroProtocolMessage($messageName, $messageAvro, $this);
- $this->messages[$messageName] = $message;
- }
- }
- }
-}
-
-class AvroProtocolMessage
-{
- /**
- * @var AvroRecordSchema $request
- */
-
- public $request;
-
- public $response;
-
- public function __construct($name, $avro, $protocol)
- {
- $this->name = $name;
- $this->request = new AvroRecordSchema(new AvroName($name, null, $protocol->namespace), null, $avro['request'], $protocol->schemata, AvroSchema::REQUEST_SCHEMA);
-
- if (array_key_exists('response', $avro)) {
- $this->response = $protocol->schemata->schema_by_name(new AvroName($avro['response'], $protocol->namespace, $protocol->namespace));
- if ($this->response == null)
- $this->response = new AvroPrimitiveSchema($avro['response']);
- }
- }
-}
-
-class AvroProtocolParseException extends AvroException {};
diff --git a/lang/php/lib/avro/schema.php b/lang/php/lib/avro/schema.php
deleted file mode 100644
index 907b0e9..0000000
--- a/lang/php/lib/avro/schema.php
+++ /dev/null
@@ -1,1457 +0,0 @@
-<?php
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Avro Schema and and Avro Schema support classes.
- * @package Avro
- */
-
-/** TODO
- * - ARRAY have only type and item attributes (what about metadata?)
- * - MAP keys are (assumed?) to be strings
- * - FIXED size must be integer (must be positive? less than MAXINT?)
- * - primitive type names cannot have a namespace (so throw an error? or ignore?)
- * - schema may contain multiple definitions of a named schema
- * if definitions are equivalent (?)
- * - Cleanup default namespace and named schemata handling.
- * - For one, it appears to be *too* global. According to the spec,
- * we should only be referencing schemas that are named within the
- * *enclosing* schema, so those in sibling schemas (say, unions or fields)
- * shouldn't be referenced, if I understand the spec correctly.
- * - Also, if a named schema is defined more than once in the same schema,
- * it must have the same definition: so it appears we *do* need to keep
- * track of named schemata globally as well. (And does this play well
- * with the requirements regarding enclosing schema?
- * - default values for bytes and fixed fields are JSON strings,
- * where unicode code points 0-255 are mapped to unsigned 8-bit byte values 0-255
- * - make sure other default values for other schema are of appropriate type
- * - Should AvroField really be an AvroSchema object? Avro Fields have a name
- * attribute, but not a namespace attribute (and the name can't be namespace
- * qualified). It also has additional attributes such as doc, which named schemas
- * enum and record have (though not fixed schemas, which also have names), and
- * fields also have default and order attributes, shared by no other schema type.
- */
-
-/**
- * Exceptions associated with parsing JSON schema represenations
- * @package Avro
- */
-class AvroSchemaParseException extends AvroException {};
-
-/**
- * @package Avro
- */
-class AvroSchema
-{
- /**
- * @var int lower bound of integer values: -(1 << 31)
- */
- const INT_MIN_VALUE = -2147483648;
-
- /**
- * @var int upper bound of integer values: (1 << 31) - 1
- */
- const INT_MAX_VALUE = 2147483647;
-
- /**
- * @var long lower bound of long values: -(1 << 63)
- */
- const LONG_MIN_VALUE = -9223372036854775808;
-
- /**
- * @var long upper bound of long values: (1 << 63) - 1
- */
- const LONG_MAX_VALUE = 9223372036854775807;
-
- /**
- * @var string null schema type name
- */
- const NULL_TYPE = 'null';
-
- /**
- * @var string boolean schema type name
- */
- const BOOLEAN_TYPE = 'boolean';
-
- /**
- * int schema type value is a 32-bit signed int
- * @var string int schema type name.
- */
- const INT_TYPE = 'int';
-
- /**
- * long schema type value is a 64-bit signed int
- * @var string long schema type name
- */
- const LONG_TYPE = 'long';
-
- /**
- * float schema type value is a 32-bit IEEE 754 floating-point number
- * @var string float schema type name
- */
- const FLOAT_TYPE = 'float';
-
- /**
- * double schema type value is a 64-bit IEEE 754 floating-point number
- * @var string double schema type name
- */
- const DOUBLE_TYPE = 'double';
-
- /**
- * string schema type value is a Unicode character sequence
- * @var string string schema type name
- */
- const STRING_TYPE = 'string';
-
- /**
- * bytes schema type value is a sequence of 8-bit unsigned bytes
- * @var string bytes schema type name
- */
- const BYTES_TYPE = 'bytes';
-
- // Complex Types
- // Unnamed Schema
- /**
- * @var string array schema type name
- */
- const ARRAY_SCHEMA = 'array';
-
- /**
- * @var string map schema type name
- */
- const MAP_SCHEMA = 'map';
-
- /**
- * @var string union schema type name
- */
- const UNION_SCHEMA = 'union';
-
- /**
- * Unions of error schemas are used by Avro messages
- * @var string error_union schema type name
- */
- const ERROR_UNION_SCHEMA = 'error_union';
-
- // Named Schema
-
- /**
- * @var string enum schema type name
- */
- const ENUM_SCHEMA = 'enum';
-
- /**
- * @var string fixed schema type name
- */
- const FIXED_SCHEMA = 'fixed';
-
- /**
- * @var string record schema type name
- */
- const RECORD_SCHEMA = 'record';
- // Other Schema
-
- /**
- * @var string error schema type name
- */
- const ERROR_SCHEMA = 'error';
-
- /**
- * @var string request schema type name
- */
- const REQUEST_SCHEMA = 'request';
-
-
- // Schema attribute names
- /**
- * @var string schema type name attribute name
- */
- const TYPE_ATTR = 'type';
-
- /**
- * @var string named schema name attribute name
- */
- const NAME_ATTR = 'name';
-
- /**
- * @var string named schema namespace attribute name
- */
- const NAMESPACE_ATTR = 'namespace';
-
- /**
- * @var string derived attribute: doesn't appear in schema
- */
- const FULLNAME_ATTR = 'fullname';
-
- /**
- * @var string array schema size attribute name
- */
- const SIZE_ATTR = 'size';
-
- /**
- * @var string record fields attribute name
- */
- const FIELDS_ATTR = 'fields';
-
- /**
- * @var string array schema items attribute name
- */
- const ITEMS_ATTR = 'items';
-
- /**
- * @var string enum schema symbols attribute name
- */
- const SYMBOLS_ATTR = 'symbols';
-
- /**
- * @var string map schema values attribute name
- */
- const VALUES_ATTR = 'values';
-
- /**
- * @var string document string attribute name
- */
- const DOC_ATTR = 'doc';
-
- /**
- * @var array list of primitive schema type names
- */
- private static $primitive_types = array(self::NULL_TYPE, self::BOOLEAN_TYPE,
- self::STRING_TYPE, self::BYTES_TYPE,
- self::INT_TYPE, self::LONG_TYPE,
- self::FLOAT_TYPE, self::DOUBLE_TYPE);
-
- /**
- * @var array list of named schema type names
- */
- private static $named_types = array(self::FIXED_SCHEMA, self::ENUM_SCHEMA,
- self::RECORD_SCHEMA, self::ERROR_SCHEMA);
-
- /**
- * @param string $type a schema type name
- * @returns boolean true if the given type name is a named schema type name
- * and false otherwise.
- */
- public static function is_named_type($type)
- {
- return in_array($type, self::$named_types);
- }
-
- /**
- * @param string $type a schema type name
- * @returns boolean true if the given type name is a primitive schema type
- * name and false otherwise.
- */
- public static function is_primitive_type($type)
- {
- return in_array($type, self::$primitive_types);
- }
-
- /**
- * @param string $type a schema type name
- * @returns boolean true if the given type name is a valid schema type
- * name and false otherwise.
- */
- public static function is_valid_type($type)
- {
- return (self::is_primitive_type($type)
- || self::is_named_type($type)
- || in_array($type, array(self::ARRAY_SCHEMA,
- self::MAP_SCHEMA,
- self::UNION_SCHEMA,
- self::REQUEST_SCHEMA,
- self::ERROR_UNION_SCHEMA)));
- }
-
- /**
- * @var array list of names of reserved attributes
- */
- private static $reserved_attrs = array(self::TYPE_ATTR,
- self::NAME_ATTR,
- self::NAMESPACE_ATTR,
- self::FIELDS_ATTR,
- self::ITEMS_ATTR,
- self::SIZE_ATTR,
- self::SYMBOLS_ATTR,
- self::VALUES_ATTR);
-
- /**
- * @param string $json JSON-encoded schema
- * @uses self::real_parse()
- * @returns AvroSchema
- */
- public static function parse($json)
- {
- $schemata = new AvroNamedSchemata();
- return self::real_parse(json_decode($json, true), null, $schemata);
- }
-
- /**
- * @param mixed $avro JSON-decoded schema
- * @param string $default_namespace namespace of enclosing schema
- * @param AvroNamedSchemata &$schemata reference to named schemas
- * @returns AvroSchema
- * @throws AvroSchemaParseException
- */
- static function real_parse($avro, $default_namespace=null, &$schemata=null)
- {
- if (is_null($schemata))
- $schemata = new AvroNamedSchemata();
-
- if (is_array($avro))
- {
- $type = AvroUtil::array_value($avro, self::TYPE_ATTR);
-
- if (self::is_primitive_type($type))
- return new AvroPrimitiveSchema($type);
-
- elseif (self::is_named_type($type))
- {
- $name = AvroUtil::array_value($avro, self::NAME_ATTR);
- $namespace = AvroUtil::array_value($avro, self::NAMESPACE_ATTR);
- $new_name = new AvroName($name, $namespace, $default_namespace);
- $doc = AvroUtil::array_value($avro, self::DOC_ATTR);
- switch ($type)
- {
- case self::FIXED_SCHEMA:
- $size = AvroUtil::array_value($avro, self::SIZE_ATTR);
- return new AvroFixedSchema($new_name, $doc,
- $size,
- $schemata);
- case self::ENUM_SCHEMA:
- $symbols = AvroUtil::array_value($avro, self::SYMBOLS_ATTR);
- return new AvroEnumSchema($new_name, $doc,
- $symbols,
- $schemata);
- case self::RECORD_SCHEMA:
- case self::ERROR_SCHEMA:
- $fields = AvroUtil::array_value($avro, self::FIELDS_ATTR);
- return new AvroRecordSchema($new_name, $doc,
- $fields,
- $schemata, $type);
- default:
- throw new AvroSchemaParseException(
- sprintf('Unknown named type: %s', $type));
- }
- }
- elseif (self::is_valid_type($type))
- {
- switch ($type)
- {
- case self::ARRAY_SCHEMA:
- return new AvroArraySchema($avro[self::ITEMS_ATTR],
- $default_namespace,
- $schemata);
- case self::MAP_SCHEMA:
- return new AvroMapSchema($avro[self::VALUES_ATTR],
- $default_namespace,
- $schemata);
- default:
- throw new AvroSchemaParseException(
- sprintf('Unknown valid type: %s', $type));
- }
- }
- elseif (!array_key_exists(self::TYPE_ATTR, $avro)
- && AvroUtil::is_list($avro))
- return new AvroUnionSchema($avro, $default_namespace, $schemata);
- else
- throw new AvroSchemaParseException(sprintf('Undefined type: %s',
- $type));
- }
- elseif (self::is_primitive_type($avro))
- return new AvroPrimitiveSchema($avro);
- else
- throw new AvroSchemaParseException(
- sprintf('%s is not a schema we know about.',
- print_r($avro, true)));
- }
-
- /**
- * @returns boolean true if $datum is valid for $expected_schema
- * and false otherwise.
- * @throws AvroSchemaParseException
- */
- public static function is_valid_datum($expected_schema, $datum)
- {
- switch($expected_schema->type)
- {
- case self::NULL_TYPE:
- return is_null($datum);
- case self::BOOLEAN_TYPE:
- return is_bool($datum);
- case self::STRING_TYPE:
- case self::BYTES_TYPE:
- return is_string($datum);
- case self::INT_TYPE:
- return (is_int($datum)
- && (self::INT_MIN_VALUE <= $datum)
- && ($datum <= self::INT_MAX_VALUE));
- case self::LONG_TYPE:
- return (is_int($datum)
- && (self::LONG_MIN_VALUE <= $datum)
- && ($datum <= self::LONG_MAX_VALUE));
- case self::FLOAT_TYPE:
- case self::DOUBLE_TYPE:
- return (is_float($datum) || is_int($datum));
- case self::ARRAY_SCHEMA:
- if (is_array($datum))
- {
- foreach ($datum as $d)
- if (!self::is_valid_datum($expected_schema->items(), $d))
- return false;
- return true;
- }
- return false;
- case self::MAP_SCHEMA:
- if (is_array($datum))
- {
- foreach ($datum as $k => $v)
- if (!is_string($k)
- || !self::is_valid_datum($expected_schema->values(), $v))
- return false;
- return true;
- }
- return false;
- case self::UNION_SCHEMA:
- foreach ($expected_schema->schemas() as $schema)
- if (self::is_valid_datum($schema, $datum))
- return true;
- return false;
- case self::ENUM_SCHEMA:
- return in_array($datum, $expected_schema->symbols());
- case self::FIXED_SCHEMA:
- return (is_string($datum)
- && (strlen($datum) == $expected_schema->size()));
- case self::RECORD_SCHEMA:
- case self::ERROR_SCHEMA:
- case self::REQUEST_SCHEMA:
- if (is_array($datum))
- {
- foreach ($expected_schema->fields() as $field)
- if (!array_key_exists($field->name(), $datum) || !self::is_valid_datum($field->type(), $datum[$field->name()]))
- return false;
- return true;
- }
- return false;
- default:
- throw new AvroSchemaParseException(
- sprintf('%s is not allowed.', $expected_schema));
- }
- }
-
- /**
- * @internal Should only be called from within the constructor of
- * a class which extends AvroSchema
- * @param string $type a schema type name
- */
- public function __construct($type)
- {
- $this->type = $type;
- }
-
- /**
- * @param mixed $avro
- * @param string $default_namespace namespace of enclosing schema
- * @param AvroNamedSchemata &$schemata
- * @returns AvroSchema
- * @uses AvroSchema::real_parse()
- * @throws AvroSchemaParseException
- */
- protected static function subparse($avro, $default_namespace, &$schemata=null)
- {
- try
- {
- return self::real_parse($avro, $default_namespace, $schemata);
- }
- catch (AvroSchemaParseException $e)
- {
- throw $e;
- }
- catch (Exception $e)
- {
- throw new AvroSchemaParseException(
- sprintf('Sub-schema is not a valid Avro schema. Bad schema: %s',
- print_r($avro, true)));
- }
-
- }
-
- /**
- * @returns string schema type name of this schema
- */
- public function type() { return $this->type; }
-
- /**
- * @returns mixed
- */
- public function to_avro()
- {
- return array(self::TYPE_ATTR => $this->type);
- }
-
- /**
- * @returns string the JSON-encoded representation of this Avro schema.
- */
- public function __toString() { return json_encode($this->to_avro()); }
-
- /**
- * @returns mixed value of the attribute with the given attribute name
- */
- public function attribute($attribute) { return $this->$attribute(); }
-
-}
-
-/**
- * Avro schema for basic types such as null, int, long, string.
- * @package Avro
- */
-class AvroPrimitiveSchema extends AvroSchema
-{
-
- /**
- * @param string $type the primitive schema type name
- * @throws AvroSchemaParseException if the given $type is not a
- * primitive schema type name
- */
- public function __construct($type)
- {
- if (self::is_primitive_type($type))
- return parent::__construct($type);
- throw new AvroSchemaParseException(
- sprintf('%s is not a valid primitive type.', $type));
- }
-
- /**
- * @returns mixed
- */
- public function to_avro()
- {
- $avro = parent::to_avro();
- // FIXME: Is this if really necessary? When *wouldn't* this be the case?
- if (1 == count($avro))
- return $this->type;
- return $avro;
- }
-}
-
-/**
- * Avro array schema, consisting of items of a particular
- * Avro schema type.
- * @package Avro
- */
-class AvroArraySchema extends AvroSchema
-{
- /**
- * @var AvroName|AvroSchema named schema name or AvroSchema of
- * array element
- */
- private $items;
-
- /**
- * @var boolean true if the items schema
- * FIXME: couldn't we derive this from whether or not $this->items
- * is an AvroName or an AvroSchema?
- */
- private $is_items_schema_from_schemata;
-
- /**
- * @param string|mixed $items AvroNamedSchema name or object form
- * of decoded JSON schema representation.
- * @param string $default_namespace namespace of enclosing schema
- * @param AvroNamedSchemata &$schemata
- */
- public function __construct($items, $default_namespace, &$schemata=null)
- {
- parent::__construct(AvroSchema::ARRAY_SCHEMA);
-
- $this->is_items_schema_from_schemata = false;
- $items_schema = null;
- if (is_string($items)
- && $items_schema = $schemata->schema_by_name(
- new AvroName($items, null, $default_namespace)))
- $this->is_items_schema_from_schemata = true;
- else
- $items_schema = AvroSchema::subparse($items, $default_namespace, $schemata);
-
- $this->items = $items_schema;
- }
-
-
- /**
- * @returns AvroName|AvroSchema named schema name or AvroSchema
- * of this array schema's elements.
- */
- public function items() { return $this->items; }
-
- /**
- * @returns mixed
- */
- public function to_avro()
- {
- $avro = parent::to_avro();
- $avro[AvroSchema::ITEMS_ATTR] = $this->is_items_schema_from_schemata
- ? $this->items->qualified_name() : $this->items->to_avro();
- return $avro;
- }
-}
-
-/**
- * Avro map schema consisting of named values of defined
- * Avro Schema types.
- * @package Avro
- */
-class AvroMapSchema extends AvroSchema
-{
- /**
- * @var string|AvroSchema named schema name or AvroSchema
- * of map schema values.
- */
- private $values;
-
- /**
- * @var boolean true if the named schema
- * XXX Couldn't we derive this based on whether or not
- * $this->values is a string?
- */
- private $is_values_schema_from_schemata;
-
- /**
- * @param string|AvroSchema $values
- * @param string $default_namespace namespace of enclosing schema
- * @param AvroNamedSchemata &$schemata
- */
- public function __construct($values, $default_namespace, &$schemata=null)
- {
- parent::__construct(AvroSchema::MAP_SCHEMA);
-
- $this->is_values_schema_from_schemata = false;
- $values_schema = null;
- if (is_string($values)
- && $values_schema = $schemata->schema_by_name(
- new AvroName($values, null, $default_namespace)))
- $this->is_values_schema_from_schemata = true;
- else
- $values_schema = AvroSchema::subparse($values, $default_namespace,
- $schemata);
-
- $this->values = $values_schema;
- }
-
- /**
- * @returns XXX|AvroSchema
- */
- public function values() { return $this->values; }
-
- /**
- * @returns mixed
- */
- public function to_avro()
- {
- $avro = parent::to_avro();
- $avro[AvroSchema::VALUES_ATTR] = $this->is_values_schema_from_schemata
- ? $this->values->qualified_name() : $this->values->to_avro();
- return $avro;
- }
-}
-
-/**
- * Union of Avro schemas, of which values can be of any of the schema in
- * the union.
- * @package Avro
- */
-class AvroUnionSchema extends AvroSchema
-{
- /**
- * @var AvroSchema[] list of schemas of this union
- */
- private $schemas;
-
- /**
- * @var int[] list of indices of named schemas which
- * are defined in $schemata
- */
- public $schema_from_schemata_indices;
-
- /**
- * @param AvroSchema[] $schemas list of schemas in the union
- * @param string $default_namespace namespace of enclosing schema
- * @param AvroNamedSchemata &$schemata
- */
- public function __construct($schemas, $default_namespace, &$schemata=null)
- {
- parent::__construct(AvroSchema::UNION_SCHEMA);
-
- $this->schema_from_schemata_indices = array();
- $schema_types = array();
- foreach ($schemas as $index => $schema)
- {
- $is_schema_from_schemata = false;
- $new_schema = null;
- if (is_string($schema)
- && ($new_schema = $schemata->schema_by_name(
- new AvroName($schema, null, $default_namespace))))
- $is_schema_from_schemata = true;
- else
- $new_schema = self::subparse($schema, $default_namespace, $schemata);
-
- $schema_type = $new_schema->type;
- if (self::is_valid_type($schema_type)
- && !self::is_named_type($schema_type)
- && in_array($schema_type, $schema_types))
- throw new AvroSchemaParseException(
- sprintf('"%s" is already in union', $schema_type));
- elseif (AvroSchema::UNION_SCHEMA == $schema_type)
- throw new AvroSchemaParseException('Unions cannot contain other unions');
- else
- {
- $schema_types []= $schema_type;
- $this->schemas []= $new_schema;
- if ($is_schema_from_schemata)
- $this->schema_from_schemata_indices []= $index;
- }
- }
-
- }
-
- /**
- * @returns AvroSchema[]
- */
- public function schemas() { return $this->schemas; }
-
- /**
- * @returns AvroSchema the particular schema from the union for
- * the given (zero-based) index.
- * @throws AvroSchemaParseException if the index is invalid for this schema.
- */
- public function schema_by_index($index)
- {
- if (count($this->schemas) > $index)
- return $this->schemas[$index];
-
- throw new AvroSchemaParseException('Invalid union schema index');
- }
-
- /**
- * @returns mixed
- */
- public function to_avro()
- {
- $avro = array();
-
- foreach ($this->schemas as $index => $schema)
- $avro []= (in_array($index, $this->schema_from_schemata_indices))
- ? $schema->qualified_name() : $schema->to_avro();
-
- return $avro;
- }
-}
-
-/**
- * Parent class of named Avro schema
- * @package Avro
- * @todo Refactor AvroNamedSchema to use an AvroName instance
- * to store name information.
- */
-class AvroNamedSchema extends AvroSchema
-{
- /**
- * @var AvroName $name
- */
- private $name;
-
- /**
- * @var string documentation string
- */
- private $doc;
-
- /**
- * @param string $type
- * @param AvroName $name
- * @param string $doc documentation string
- * @param AvroNamedSchemata &$schemata
- * @throws AvroSchemaParseException
- */
- public function __construct($type, $name, $doc=null, &$schemata=null)
- {
- parent::__construct($type);
- $this->name = $name;
-
- if ($doc && !is_string($doc))
- throw new AvroSchemaParseException('Schema doc attribute must be a string');
- $this->doc = $doc;
-
- if (!is_null($schemata))
- $schemata = $schemata->clone_with_new_schema($this);
- }
-
- /**
- * @returns mixed
- */
- public function to_avro()
- {
- $avro = parent::to_avro();
- list($name, $namespace) = AvroName::extract_namespace($this->qualified_name());
- $avro[AvroSchema::NAME_ATTR] = $name;
- if ($namespace)
- $avro[AvroSchema::NAMESPACE_ATTR] = $namespace;
- if (!is_null($this->doc))
- $avro[AvroSchema::DOC_ATTR] = $this->doc;
- return $avro;
- }
-
- /**
- * @returns string
- */
- public function fullname() { return $this->name->fullname(); }
-
- public function qualified_name() { return $this->name->qualified_name(); }
-
-}
-
-/**
- * @package Avro
- */
-class AvroName
-{
- /**
- * @var string character used to separate names comprising the fullname
- */
- const NAME_SEPARATOR = '.';
-
- /**
- * @var string regular expression to validate name values
- */
- const NAME_REGEXP = '/^[A-Za-z_][A-Za-z0-9_]*$/';
-
- /**
- * @returns string[] array($name, $namespace)
- */
- public static function extract_namespace($name, $namespace=null)
- {
- $parts = explode(self::NAME_SEPARATOR, $name);
- if (count($parts) > 1)
- {
- $name = array_pop($parts);
- $namespace = join(self::NAME_SEPARATOR, $parts);
- }
- return array($name, $namespace);
- }
-
- /**
- * @returns boolean true if the given name is well-formed
- * (is a non-null, non-empty string) and false otherwise
- */
- public static function is_well_formed_name($name)
- {
- return (is_string($name) && !empty($name)
- && preg_match(self::NAME_REGEXP, $name));
- }
-
- /**
- * @param string $namespace
- * @returns boolean true if namespace is composed of valid names
- * @throws AvroSchemaParseException if any of the namespace components
- * are invalid.
- */
- private static function check_namespace_names($namespace)
- {
- foreach (explode(self::NAME_SEPARATOR, $namespace) as $n)
- {
- if (empty($n) || (0 == preg_match(self::NAME_REGEXP, $n)))
- throw new AvroSchemaParseException(sprintf('Invalid name "%s"', $n));
- }
- return true;
- }
-
- /**
- * @param string $name
- * @param string $namespace
- * @returns string
- * @throws AvroSchemaParseException if any of the names are not valid.
- */
- private static function parse_fullname($name, $namespace)
- {
- if (!is_string($namespace) || empty($namespace))
- throw new AvroSchemaParseException('Namespace must be a non-empty string.');
- self::check_namespace_names($namespace);
- return $namespace . '.' . $name;
- }
-
- /**
- * @var string valid names are matched by self::NAME_REGEXP
- */
- private $name;
-
- /**
- * @var string
- */
- private $namespace;
-
- /**
- * @var string
- */
- private $fullname;
-
- /**
- * @var string Name qualified as necessary given its default namespace.
- */
- private $qualified_name;
-
- /**
- * @param string $name
- * @param string $namespace
- * @param string $default_namespace
- */
- public function __construct($name, $namespace, $default_namespace)
- {
- if (!is_string($name) || empty($name))
- throw new AvroSchemaParseException('Name must be a non-empty string.');
-
- if (strpos($name, self::NAME_SEPARATOR)
- && self::check_namespace_names($name))
- $this->fullname = $name;
- elseif (0 == preg_match(self::NAME_REGEXP, $name))
- throw new AvroSchemaParseException(sprintf('Invalid name "%s"', $name));
- elseif (!is_null($namespace))
- $this->fullname = self::parse_fullname($name, $namespace);
- elseif (!is_null($default_namespace))
- $this->fullname = self::parse_fullname($name, $default_namespace);
- else
- $this->fullname = $name;
-
- list($this->name, $this->namespace) = self::extract_namespace($this->fullname);
- $this->qualified_name = (is_null($this->namespace)
- || $this->namespace == $default_namespace)
- ? $this->name : $this->fullname;
- }
-
- /**
- * @returns array array($name, $namespace)
- */
- public function name_and_namespace()
- {
- return array($this->name, $this->namespace);
- }
-
- /**
- * @returns string
- */
- public function fullname() { return $this->fullname; }
-
- /**
- * @returns string fullname
- * @uses $this->fullname()
- */
- public function __toString() { return $this->fullname(); }
-
- /**
- * @returns string name qualified for its context
- */
- public function qualified_name() { return $this->qualified_name; }
-
-}
-
-/**
- * Keeps track of AvroNamedSchema which have been observed so far,
- * as well as the default namespace.
- *
- * @package Avro
- */
-class AvroNamedSchemata
-{
- /**
- * @var AvroNamedSchema[]
- */
- private $schemata;
-
- /**
- * @param AvroNamedSchemata[]
- */
- public function __construct($schemata=array())
- {
- $this->schemata = $schemata;
- }
-
- public function list_schemas() {
- var_export($this->schemata);
- foreach($this->schemata as $sch)
- print('Schema '.$sch->__toString()."\n");
- }
-
- /**
- * @param string $fullname
- * @returns boolean true if there exists a schema with the given name
- * and false otherwise.
- */
- public function has_name($fullname)
- {
- return array_key_exists($fullname, $this->schemata);
- }
-
- /**
- * @param string $fullname
- * @returns AvroSchema|null the schema which has the given name,
- * or null if there is no schema with the given name.
- */
- public function schema($fullname)
- {
- if (isset($this->schemata[$fullname]))
- return $this->schemata[$fullname];
- return null;
- }
-
- /**
- * @param AvroName $name
- * @returns AvroSchema|null
- */
- public function schema_by_name($name)
- {
- return $this->schema($name->fullname());
- }
-
- /**
- * Creates a new AvroNamedSchemata instance of this schemata instance
- * with the given $schema appended.
- * @param AvroNamedSchema schema to add to this existing schemata
- * @returns AvroNamedSchemata
- */
- public function clone_with_new_schema($schema)
- {
- $name = $schema->fullname();
- if (AvroSchema::is_valid_type($name))
- throw new AvroSchemaParseException(
- sprintf('Name "%s" is a reserved type name', $name));
- else if ($this->has_name($name))
- throw new AvroSchemaParseException(
- sprintf('Name "%s" is already in use', $name));
- $schemata = new AvroNamedSchemata($this->schemata);
- $schemata->schemata[$name] = $schema;
- return $schemata;
- }
-}
-
-/**
- * @package Avro
- */
-class AvroEnumSchema extends AvroNamedSchema
-{
- /**
- * @var string[] array of symbols
- */
- private $symbols;
-
- /**
- * @param AvroName $name
- * @param string $doc
- * @param string[] $symbols
- * @param AvroNamedSchemata &$schemata
- * @throws AvroSchemaParseException
- */
- public function __construct($name, $doc, $symbols, &$schemata=null)
- {
- if (!AvroUtil::is_list($symbols))
- throw new AvroSchemaParseException('Enum Schema symbols are not a list');
-
- if (count(array_unique($symbols)) > count($symbols))
- throw new AvroSchemaParseException(
- sprintf('Duplicate symbols: %s', $symbols));
-
- foreach ($symbols as $symbol)
- if (!is_string($symbol) || empty($symbol))
- throw new AvroSchemaParseException(
- sprintf('Enum schema symbol must be a string %',
- print_r($symbol, true)));
-
- parent::__construct(AvroSchema::ENUM_SCHEMA, $name, $doc, $schemata);
- $this->symbols = $symbols;
- }
-
- /**
- * @returns string[] this enum schema's symbols
- */
- public function symbols() { return $this->symbols; }
-
- /**
- * @param string $symbol
- * @returns boolean true if the given symbol exists in this
- * enum schema and false otherwise
- */
- public function has_symbol($symbol)
- {
- return in_array($symbol, $this->symbols);
- }
-
- /**
- * @param int $index
- * @returns string enum schema symbol with the given (zero-based) index
- */
- public function symbol_by_index($index)
- {
- if (array_key_exists($index, $this->symbols))
- return $this->symbols[$index];
- throw new AvroException(sprintf('Invalid symbol index %d', $index));
- }
-
- /**
- * @param string $symbol
- * @returns int the index of the given $symbol in the enum schema
- */
- public function symbol_index($symbol)
- {
- $idx = array_search($symbol, $this->symbols, true);
- if (false !== $idx)
- return $idx;
- throw new AvroException(sprintf("Invalid symbol value '%s'", $symbol));
- }
-
- /**
- * @returns mixed
- */
- public function to_avro()
- {
- $avro = parent::to_avro();
- $avro[AvroSchema::SYMBOLS_ATTR] = $this->symbols;
- return $avro;
- }
-}
-
-/**
- * AvroNamedSchema with fixed-length data values
- * @package Avro
- */
-class AvroFixedSchema extends AvroNamedSchema
-{
-
- /**
- * @var int byte count of this fixed schema data value
- */
- private $size;
-
- /**
- * @param AvroName $name
- * @param string $doc Set to null, as fixed schemas don't have doc strings
- * @param int $size byte count of this fixed schema data value
- * @param AvroNamedSchemata &$schemata
- */
- public function __construct($name, $doc, $size, &$schemata=null)
- {
- $doc = null; // Fixed schemas don't have doc strings.
- if (!is_integer($size))
- throw new AvroSchemaParseException(
- 'Fixed Schema requires a valid integer for "size" attribute');
- parent::__construct(AvroSchema::FIXED_SCHEMA, $name, $doc, $schemata);
- return $this->size = $size;
- }
-
- /**
- * @returns int byte count of this fixed schema data value
- */
- public function size() { return $this->size; }
-
- /**
- * @returns mixed
- */
- public function to_avro()
- {
- $avro = parent::to_avro();
- $avro[AvroSchema::SIZE_ATTR] = $this->size;
- return $avro;
- }
-}
-
-/**
- * @package Avro
- */
-class AvroRecordSchema extends AvroNamedSchema
-{
- /**
- * @param mixed $field_data
- * @param string $default_namespace namespace of enclosing schema
- * @param AvroNamedSchemata &$schemata
- * @returns AvroField[]
- * @throws AvroSchemaParseException
- */
- static function parse_fields($field_data, $default_namespace, &$schemata)
- {
- $fields = array();
- $field_names = array();
- foreach ($field_data as $index => $field)
- {
- $name = AvroUtil::array_value($field, AvroField::FIELD_NAME_ATTR);
- $type = AvroUtil::array_value($field, AvroSchema::TYPE_ATTR);
- $order = AvroUtil::array_value($field, AvroField::ORDER_ATTR);
-
- $default = null;
- $has_default = false;
- if (array_key_exists(AvroField::DEFAULT_ATTR, $field))
- {
- $default = $field[AvroField::DEFAULT_ATTR];
- $has_default = true;
- }
-
- if (in_array($name, $field_names))
- throw new AvroSchemaParseException(
- sprintf("Field name %s is already in use", $name));
-
- $is_schema_from_schemata = false;
- $field_schema = null;
- if (is_string($type)
- && $field_schema = $schemata->schema_by_name(
- new AvroName($type, null, $default_namespace)))
- $is_schema_from_schemata = true;
- else
- $field_schema = self::subparse($type, $default_namespace, $schemata);
-
- $new_field = new AvroField($name, $field_schema, $is_schema_from_schemata,
- $has_default, $default, $order);
- $field_names []= $name;
- $fields []= $new_field;
- }
- return $fields;
- }
-
- /**
- * @var AvroSchema[] array of AvroNamedSchema field definitions of
- * this AvroRecordSchema
- */
- private $fields;
-
- /**
- * @var array map of field names to field objects.
- * @internal Not called directly. Memoization of AvroRecordSchema->fields_hash()
- */
... 3467 lines suppressed ...