You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by cu...@apache.org on 2010/08/30 18:50:41 UTC

svn commit: r990860 [1/3] - in /avro/trunk: ./ lang/php/ lang/php/examples/ lang/php/lib/ lang/php/lib/avro/ lang/php/test/

Author: cutting
Date: Mon Aug 30 16:50:40 2010
New Revision: 990860

URL: http://svn.apache.org/viewvc?rev=990860&view=rev
Log:
AVRO-627.  Add PHP implementation.  Contributed by Michael Glaesemann.

Added:
    avro/trunk/lang/php/   (with props)
    avro/trunk/lang/php/README.txt
    avro/trunk/lang/php/build.sh   (with props)
    avro/trunk/lang/php/examples/
    avro/trunk/lang/php/examples/write_read.php
    avro/trunk/lang/php/lib/
    avro/trunk/lang/php/lib/avro/
    avro/trunk/lang/php/lib/avro.php
    avro/trunk/lang/php/lib/avro/data_file.php
    avro/trunk/lang/php/lib/avro/datum.php
    avro/trunk/lang/php/lib/avro/debug.php
    avro/trunk/lang/php/lib/avro/gmp.php
    avro/trunk/lang/php/lib/avro/io.php
    avro/trunk/lang/php/lib/avro/schema.php
    avro/trunk/lang/php/lib/avro/util.php
    avro/trunk/lang/php/test/   (with props)
    avro/trunk/lang/php/test/AllTests.php
    avro/trunk/lang/php/test/DataFileTest.php
    avro/trunk/lang/php/test/DatumIOTest.php
    avro/trunk/lang/php/test/FloatIntEncodingTest.php
    avro/trunk/lang/php/test/IODatumReaderTest.php
    avro/trunk/lang/php/test/InterOpTest.php
    avro/trunk/lang/php/test/LongEncodingTest.php
    avro/trunk/lang/php/test/NameTest.php
    avro/trunk/lang/php/test/SchemaTest.php
    avro/trunk/lang/php/test/StringIOTest.php
    avro/trunk/lang/php/test/generate_interop_data.php
    avro/trunk/lang/php/test/test_helper.php
Modified:
    avro/trunk/BUILD.txt
    avro/trunk/CHANGES.txt
    avro/trunk/build.sh

Modified: avro/trunk/BUILD.txt
URL: http://svn.apache.org/viewvc/avro/trunk/BUILD.txt?rev=990860&r1=990859&r2=990860&view=diff
==============================================================================
--- avro/trunk/BUILD.txt (original)
+++ avro/trunk/BUILD.txt Mon Aug 30 16:50:40 2010
@@ -5,6 +5,7 @@ REQUIREMENTS
 The following packages must be installed before Avro can be built:
 
  - Java: JDK 1.6 and 1.5 (for Forrest docs)
+ - PHP: php5, phpunit, php5-gmp
  - Python: 2.5 or greater, python-setuptools for dist target
  - C: gcc, autoconf, automake, libtool, asciidoc, source-highlight
  - C++: g++, flex, bison, libboost-dev

Modified: avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=990860&r1=990859&r2=990860&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Mon Aug 30 16:50:40 2010
@@ -36,6 +36,8 @@ Avro 1.4.0 (31 August 2010)
 
   NEW FEATURES
 
+    AVRO-627. Add PHP implementation. (Michael Glaesemann)
+
     AVRO-613. Create basic frontend to view trace results.
     (Patrick Wendell via philz)
 

Modified: avro/trunk/build.sh
URL: http://svn.apache.org/viewvc/avro/trunk/build.sh?rev=990860&r1=990859&r2=990860&view=diff
==============================================================================
--- avro/trunk/build.sh (original)
+++ avro/trunk/build.sh Mon Aug 30 16:50:40 2010
@@ -45,6 +45,7 @@ case "$target" in
 	(cd lang/c; ./build.sh test)
 	(cd lang/c++; ./build.sh test)
 	(cd lang/ruby; rake test)
+	(cd lang/php; ./build.sh test)
 
 	# create interop test data
 	(cd lang/java; ant interop-data-generate)
@@ -52,6 +53,7 @@ case "$target" in
 	(cd lang/c; ./build.sh interop-data-generate)
 	#(cd lang/c++; make interop-data-generate)
 	(cd lang/ruby; rake generate_interop)
+	(cd lang/php; ./build.sh interop-data-generate)
 
 	# run interop data tests
 	(cd lang/java; ant interop-data-test)
@@ -59,6 +61,7 @@ case "$target" in
 	(cd lang/c; ./build.sh interop-data-test)
 	#(cd lang/c++; make interop-data-test)
 	(cd lang/ruby; rake interop)
+	(cd lang/php; ./build.sh test-interop)
 
 	# run interop rpc tests
 	/bin/bash share/test/interop/bin/test_rpc_interop.sh
@@ -86,6 +89,8 @@ case "$target" in
 
 	(cd lang/ruby; rake dist)
 
+	(cd lang/php; ./build.sh dist)
+
 	# build docs
 	(cd doc; ant)
 	(cd build; tar czf ../dist/avro-doc-$VERSION.tar.gz avro-doc-$VERSION)
@@ -127,6 +132,7 @@ case "$target" in
 
 	(cd lang/ruby; rake clean)
 
+	(cd lang/php; ./build.sh clean)
 	;;
 
     *)

Propchange: avro/trunk/lang/php/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Mon Aug 30 16:50:40 2010
@@ -0,0 +1 @@
+pkg

Added: avro/trunk/lang/php/README.txt
URL: http://svn.apache.org/viewvc/avro/trunk/lang/php/README.txt?rev=990860&view=auto
==============================================================================
--- avro/trunk/lang/php/README.txt (added)
+++ avro/trunk/lang/php/README.txt Mon Aug 30 16:50:40 2010
@@ -0,0 +1,24 @@
+What the Avro PHP library is
+============================
+
+A library for using [Avro](http://avro.apache.org/) with PHP.
+
+Getting started
+===============
+
+Untar the avro-php distribution, untar it, and put it in your include path:
+
+    tar xjf avro-php.tar.bz2 # avro-php.tar.bz2 is likely avro-php-1.4.0.tar.bz2
+    cp avro-php /path/to/where/you/want/it
+
+Require the avro.php file in your source, and you should be good to go:
+
+    <?php
+    require_once('avro-php/avro.php');
+
+If you're pulling from source, put `lib/` in your include path and require `lib/avro.php`:
+
+    <?php
+    require_once('lib/avro.php');
+
+Take a look in `examples/` for usage.

Added: avro/trunk/lang/php/build.sh
URL: http://svn.apache.org/viewvc/avro/trunk/lang/php/build.sh?rev=990860&view=auto
==============================================================================
--- avro/trunk/lang/php/build.sh (added)
+++ avro/trunk/lang/php/build.sh Mon Aug 30 16:50:40 2010
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e
+
+cd `dirname "$0"`
+
+dist_dir="../../dist/php"
+build_dir="pkg"
+version=$(cat ../../share/VERSION.txt)
+libname="avro-php-$version"
+lib_dir="$build_dir/$libname"
+tarball="$libname.tar.bz2"
+
+test_tmp_dir="test/tmp"
+
+function clean {
+    rm -rf "$test_tmp_dir"
+    rm -rf "$build_dir"
+}
+
+function dist {
+    mkdir -p "$build_dir/$libname" "$lib_dir/examples"
+    cp -pr lib "$lib_dir"
+    cp -pr examples/*.php "$lib_dir/examples"
+    cp README.txt ../../LICENSE.txt ../../NOTICE.txt "$lib_dir"
+    cd "$build_dir"
+    tar -cjf "$tarball" "$libname"
+    mkdir -p "../$dist_dir"
+    cp "$tarball" "../$dist_dir"
+}
+
+case "$1" in
+     interop-data-generate)
+       php test/generate_interop_data.php
+       ;;
+
+     test-interop)
+       phpunit test/InterOpTest.php
+       ;;
+
+     test)
+       phpunit test/AllTests.php
+       ;;
+
+     dist)
+        dist
+       ;;
+
+     clean)
+       clean
+       ;;
+
+     *)
+       echo "Usage: $0 {interop-data-generate|test-interop|test|dist|clean}"
+esac
+
+
+exit 0

Propchange: avro/trunk/lang/php/build.sh
------------------------------------------------------------------------------
    svn:executable = *

Added: avro/trunk/lang/php/examples/write_read.php
URL: http://svn.apache.org/viewvc/avro/trunk/lang/php/examples/write_read.php?rev=990860&view=auto
==============================================================================
--- avro/trunk/lang/php/examples/write_read.php (added)
+++ avro/trunk/lang/php/examples/write_read.php Mon Aug 30 16:50:40 2010
@@ -0,0 +1,94 @@
+#!/usr/bin/env php
+<?php
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+require_once('../lib/avro.php');
+
+// Write and read a data file
+
+$writers_schema_json = <<<_JSON
+{"name":"member",
+ "type":"record",
+ "fields":[{"name":"member_id", "type":"int"},
+           {"name":"member_name", "type":"string"}]}
+_JSON;
+
+$jose = array('member_id' => 1392, 'member_name' => 'Jose');
+$maria = array('member_id' => 1642, 'member_name' => 'Maria');
+$data = array($jose, $maria);
+
+$file_name = 'data.avr';
+// Open $file_name for writing, using the given writer's schema
+$data_writer = AvroDataIO::open_file($file_name, 'w', $writers_schema_json);
+
+// Write each datum to the file
+foreach ($data as $datum)
+  $data_writer->append($datum);
+// Tidy up
+$data_writer->close();
+
+// Open $file_name (by default for reading) using the writer's schema
+// included in the file
+$data_reader = AvroDataIO::open_file($file_name);
+echo "from file:\n";
+// Read each datum
+foreach ($data_reader->data() as $datum)
+  echo var_export($datum, true) . "\n";
+$data_reader->close();
+
+// Create a data string
+// Create a string io object.
+$io = new AvroStringIO();
+// Create a datum writer object
+$writers_schema = AvroSchema::parse($writers_schema_json);
+$writer = new AvroIODatumWriter($writers_schema);
+$data_writer = new AvroDataIOWriter($io, $writer, $writers_schema);
+foreach ($data as $datum)
+   $data_writer->append($datum);
+$data_writer->close();
+
+$binary_string = $io->string();
+
+// Load the string data string
+$read_io = new AvroStringIO($binary_string);
+$data_reader = new AvroDataIOReader($read_io, new AvroIODatumReader());
+echo "from binary string:\n";
+foreach ($data_reader->data() as $datum)
+  echo var_export($datum, true) . "\n";
+
+/** Output
+from file:
+array (
+  'member_id' => 1392,
+  'member_name' => 'Jose',
+)
+array (
+  'member_id' => 1642,
+  'member_name' => 'Maria',
+)
+from binary string:
+array (
+  'member_id' => 1392,
+  'member_name' => 'Jose',
+)
+array (
+  'member_id' => 1642,
+  'member_name' => 'Maria',
+)
+*/

Added: avro/trunk/lang/php/lib/avro.php
URL: http://svn.apache.org/viewvc/avro/trunk/lang/php/lib/avro.php?rev=990860&view=auto
==============================================================================
--- avro/trunk/lang/php/lib/avro.php (added)
+++ avro/trunk/lang/php/lib/avro.php Mon Aug 30 16:50:40 2010
@@ -0,0 +1,194 @@
+<?php
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Avro library top-level file.
+ *
+ * This file in turn includes all files supporting the
+ * Avro implementation.
+ *
+ * @package Avro
+ */
+
+/**
+ * General Avro exceptions.
+ * @package Avro
+ */
+class AvroException extends Exception {}
+
+/**
+ * Library-level class for PHP Avro port.
+ *
+ * Contains library details such as version number and platform checks.
+ *
+ * This port is an implementation of the
+ * {@link http://avro.apache.org/docs/1.3.3/spec.html Avro 1.3.3 Specification}
+ *
+ * @package Avro
+ *
+ */
+class Avro
+{
+  /**
+   * @var string version number of Avro specification to which
+   *             this implemenation complies
+   */
+  const SPEC_VERSION  = '1.3.3';
+
+  /**#@+
+   * Constant to enumerate endianness.
+   * @access private
+   * @var int
+   */
+  const BIG_ENDIAN    = 0x00;
+  const LITTLE_ENDIAN = 0x01;
+  /**#@-*/
+
+  /**
+   * Memoized result of self::set_endianness()
+   * @var int self::BIG_ENDIAN or self::LITTLE_ENDIAN
+   * @see self::set_endianness()
+   */
+  private static $endianness;
+
+  /**#@+
+   * Constant to enumerate biginteger handling mode.
+   * GMP is used, if available, on 32-bit platforms.
+   */
+  const PHP_BIGINTEGER_MODE = 0x00;
+  const GMP_BIGINTEGER_MODE = 0x01;
+  /**#@-*/
+
+  /**
+   * @var int
+   * Mode used to handle bigintegers. After Avro::check_64_bit() has been called,
+   * (usually via a call to Avro::check_platform(), set to
+   * self::GMP_BIGINTEGER_MODE on 32-bit platforms that have GMP available,
+   * and to self::PHP_BIGINTEGER_MODE otherwise.
+   */
+  private static $biginteger_mode;
+
+  /**
+   * Wrapper method to call each required check.
+   *
+   */
+  public static function check_platform()
+  {
+    self::check_64_bit();
+    self::check_little_endian();
+  }
+
+  /**
+   * Determines if the host platform can encode and decode long integer data.
+   *
+   * @throws AvroException if the platform cannot handle long integers.
+   */
+  private static function check_64_bit()
+  {
+    if (8 != PHP_INT_SIZE)
+      if (extension_loaded('gmp'))
+        self::$biginteger_mode = self::GMP_BIGINTEGER_MODE;
+      else
+        throw new AvroException('This platform cannot handle a 64-bit operations. '
+                                . 'Please install the GMP PHP extension.');
+    else
+      self::$biginteger_mode = self::PHP_BIGINTEGER_MODE;
+
+  }
+
+  /**
+   * @returns boolean true if the PHP GMP extension is used and false otherwise.
+   * @internal Requires Avro::check_64_bit() (exposed via Avro::check_platform())
+   *           to have been called to set Avro::$biginteger_mode.
+   */
+  static function uses_gmp()
+  {
+    return (self::GMP_BIGINTEGER_MODE == self::$biginteger_mode);
+  }
+
+  /**
+   * Determines if the host platform is little endian,
+   * required for processing double and float data.
+   *
+   * @throws AvroException if the platform is not little endian.
+   */
+  private static function check_little_endian()
+  {
+    if (!self::is_little_endian_platform())
+      throw new AvroException('This is not a little-endian platform');
+  }
+
+  /**
+   * Determines the endianness of the host platform and memoizes
+   * the result to Avro::$endianness.
+   *
+   * Based on a similar check perfomed in http://pear.php.net/package/Math_BinaryUtils
+   *
+   * @throws AvroException if the endianness cannot be determined.
+   */
+  private static function set_endianness()
+  {
+    $packed = pack('d', 1);
+    switch ($packed)
+    {
+      case "\77\360\0\0\0\0\0\0":
+        self::$endianness = self::BIG_ENDIAN;
+        break;
+      case "\0\0\0\0\0\0\360\77":
+        self::$endianness = self::LITTLE_ENDIAN;
+        break;
+      default:
+        throw new AvroException(
+          sprintf('Error determining platform endianness: %s',
+                  AvroDebug::hex_string($packed)));
+    }
+  }
+
+  /**
+   * @returns boolean true if the host platform is big endian
+   *                  and false otherwise.
+   * @uses self::set_endianness()
+   */
+  private static function is_big_endian_platform()
+  {
+    if (is_null(self::$endianness))
+      self::set_endianness();
+
+    return (self::BIG_ENDIAN == self::$endianness);
+  }
+
+  /**
+   * @returns boolean true if the host platform is little endian,
+   *                  and false otherwise.
+   * @uses self::is_bin_endian_platform()
+   */
+  private static function is_little_endian_platform()
+  {
+    return !self::is_big_endian_platform();
+  }
+
+}
+
+require_once('avro/util.php');
+require_once('avro/debug.php');
+require_once('avro/schema.php');
+require_once('avro/io.php');
+require_once('avro/gmp.php');
+require_once('avro/datum.php');
+require_once('avro/data_file.php');

Added: avro/trunk/lang/php/lib/avro/data_file.php
URL: http://svn.apache.org/viewvc/avro/trunk/lang/php/lib/avro/data_file.php?rev=990860&view=auto
==============================================================================
--- avro/trunk/lang/php/lib/avro/data_file.php (added)
+++ avro/trunk/lang/php/lib/avro/data_file.php Mon Aug 30 16:50:40 2010
@@ -0,0 +1,535 @@
+<?php
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Classes handling reading and writing from and to AvroIO objects
+ * @package Avro
+ */
+
+/**
+ * Raised when something unkind happens with respect to AvroDataIO.
+ * @package Avro
+ */
+class AvroDataIOException extends AvroException {}
+
+/**
+ * @package Avro
+ */
+class AvroDataIO
+{
+  /**
+   * @var int used in file header
+   */
+  const VERSION = 1;
+
+  /**
+   * @var int count of bytes in synchronization marker
+   */
+  const SYNC_SIZE = 16;
+
+  /**
+   * @var int   count of items per block, arbitrarily set to 1000 * SYNC_SIZE
+   * @todo make this value configurable
+   */
+  const SYNC_INTERVAL = 16000;
+
+  /**
+   * @var string map key for datafile metadata codec value
+   */
+  const METADATA_CODEC_ATTR = 'avro.codec';
+
+  /**
+   * @var string map key for datafile metadata schema value
+   */
+  const METADATA_SCHEMA_ATTR = 'avro.schema';
+  /**
+   * @var string JSON for datafile metadata schema
+   */
+  const METADATA_SCHEMA_JSON = '{"type":"map","values":"bytes"}';
+
+  /**
+   * @var string codec value for NULL codec
+   */
+  const NULL_CODEC = 'null';
+
+  /**
+   * @var string codec value for deflate codec
+   */
+  const DEFLATE_CODEC = 'deflate';
+
+  /**
+   * @var array array of valid codec names
+   * @todo Avro implementations are required to implement deflate codec as well,
+   *       so implement it already!
+   */
+  private static $valid_codecs = array(self::NULL_CODEC);
+
+  /**
+   * @var AvroSchema cached version of metadata schema object
+   */
+  private static $metadata_schema;
+
+  /**
+   * @returns the initial "magic" segment of an Avro container file header.
+   */
+  public static function magic() { return ('Obj' . pack('c', self::VERSION)); }
+
+  /**
+   * @returns int count of bytes in the initial "magic" segment of the
+   *              Avro container file header
+   */
+  public static function magic_size() { return strlen(self::magic()); }
+
+
+  /**
+   * @returns AvroSchema object of Avro container file metadata.
+   */
+  public static function metadata_schema()
+  {
+    if (is_null(self::$metadata_schema))
+      self::$metadata_schema = AvroSchema::parse(self::METADATA_SCHEMA_JSON);
+    return self::$metadata_schema;
+  }
+
+  /**
+   * @param string $file_path file_path of file to open
+   * @param string $mode one of AvroFile::READ_MODE or AvroFile::WRITE_MODE
+   * @param string $schema_json JSON of writer's schema
+   * @returns AvroDataIOWriter instance of AvroDataIOWriter
+   *
+   * @throws AvroDataIOException if $writers_schema is not provided
+   *         or if an invalid $mode is given.
+   */
+  public static function open_file($file_path, $mode=AvroFile::READ_MODE,
+                                   $schema_json=null)
+  {
+    $schema = !is_null($schema_json)
+      ? AvroSchema::parse($schema_json) : null;
+
+    $io = false;
+    switch ($mode)
+    {
+      case AvroFile::WRITE_MODE:
+        if (is_null($schema))
+          throw new AvroDataIOException('Writing an Avro file requires a schema.');
+        $file = new AvroFile($file_path, AvroFile::WRITE_MODE);
+        $io = self::open_writer($file, $schema);
+        break;
+      case AvroFile::READ_MODE:
+        $file = new AvroFile($file_path, AvroFile::READ_MODE);
+        $io = self::open_reader($file, $schema);
+        break;
+      default:
+        throw new AvroDataIOException(
+          sprintf("Only modes '%s' and '%s' allowed. You gave '%s'.",
+                  AvroFile::READ_MODE, AvroFile::WRITE_MODE, $mode));
+    }
+    return $io;
+  }
+
+  /**
+   * @returns array array of valid codecs
+   */
+  private static function valid_codecs()
+  {
+    return self::$valid_codecs;
+  }
+
+  /**
+   * @param string $codec
+   * @returns boolean true if $codec is a valid codec value and false otherwise
+   */
+  public static function is_valid_codec($codec)
+  {
+    return in_array($codec, self::valid_codecs());
+  }
+
+  /**
+   * @param AvroIO $io
+   * @param AvroSchema $schema
+   * @returns AvroDataIOWriter
+   */
+  protected function open_writer($io, $schema)
+  {
+    $writer = new AvroIODatumWriter($schema);
+    return new AvroDataIOWriter($io, $writer, $schema);
+  }
+
+  /**
+   * @param AvroIO $io
+   * @param AvroSchema $schema
+   * @returns AvroDataIOReader
+   */
+  protected function open_reader($io, $schema)
+  {
+    $reader = new AvroIODatumReader(null, $schema);
+    return new AvroDataIOReader($io, $reader);
+  }
+
+}
+
+/**
+ *
+ * Reads Avro data from an AvroIO source using an AvroSchema.
+ * @package Avro
+ */
+class AvroDataIOReader
+{
+  /**
+   * @var AvroIO
+   */
+  private $io;
+
+  /**
+   * @var AvroIOBinaryDecoder
+   */
+  private $decoder;
+
+  /**
+   * @var AvroIODatumReader
+   */
+  private $datum_reader;
+
+  /**
+   * @var string
+   */
+  private $sync_marker;
+
+  /**
+   * @var array object container metadata
+   */
+  private $metadata;
+
+  /**
+   * @var int count of items in block
+   */
+  private $block_count;
+
+  /**
+   * @param AvroIO $io source from which to read
+   * @param AvroIODatumReader $datum_reader reader that understands
+   *                                        the data schema
+   * @throws AvroDataIOException if $io is not an instance of AvroIO
+   * @uses read_header()
+   */
+  public function __construct($io, $datum_reader)
+  {
+
+    if (!($io instanceof AvroIO))
+      throw new AvroDataIOException('io must be instance of AvroIO');
+
+    $this->io = $io;
+    $this->decoder = new AvroIOBinaryDecoder($this->io);
+    $this->datum_reader = $datum_reader;
+    $this->read_header();
+
+    $codec = AvroUtil::array_value($this->metadata, 
+                                   AvroDataIO::METADATA_CODEC_ATTR);
+    if ($codec && !AvroDataIO::is_valid_codec($codec))
+      throw new AvroDataIOException(sprintf('Uknown codec: %s', $codec));
+
+    $this->block_count = 0;
+    // FIXME: Seems unsanitary to set writers_schema here.
+    // Can't constructor take it as an argument?
+    $this->datum_reader->set_writers_schema(
+      AvroSchema::parse($this->metadata[AvroDataIO::METADATA_SCHEMA_ATTR]));
+  }
+
+  /**
+   * Reads header of object container
+   * @throws AvroDataIOException if the file is not an Avro data file.
+   */
+  private function read_header()
+  {
+    $this->seek(0, AvroIO::SEEK_SET);
+
+    $magic = $this->read(AvroDataIO::magic_size());
+
+    if (strlen($magic) < AvroDataIO::magic_size())
+      throw new AvroDataIOException(
+        'Not an Avro data file: shorter than the Avro magic block');
+
+    if (AvroDataIO::magic() != $magic)
+      throw new AvroDataIOException(
+        sprintf('Not an Avro data file: %s does not match %s',
+                $magic, AvroDataIO::magic()));
+
+    $this->metadata = $this->datum_reader->read_data(AvroDataIO::metadata_schema(),
+                                                     AvroDataIO::metadata_schema(),
+                                                     $this->decoder);
+    $this->sync_marker = $this->read(AvroDataIO::SYNC_SIZE);
+  }
+
+  /**
+   * @internal Would be nice to implement data() as an iterator, I think
+   * @returns array of data from object container.
+   */
+  public function data()
+  {
+    $data = array();
+    while (true)
+    {
+      if (0 == $this->block_count)
+      {
+        if ($this->is_eof())
+          break;
+
+        if ($this->skip_sync())
+          if ($this->is_eof())
+            break;
+
+        $this->read_block_header();
+      }
+      $data []= $this->datum_reader->read($this->decoder);
+      $this->block_count -= 1;
+    }
+    return $data;
+  }
+
+  /**
+   * Closes this writer (and its AvroIO object.)
+   * @uses AvroIO::close()
+   */
+  public function close() { return $this->io->close(); }
+
+  /**
+   * @uses AvroIO::seek()
+   */
+  private function seek($offset, $whence)
+  {
+    return $this->io->seek($offset, $whence);
+  }
+
+  /**
+   * @uses AvroIO::read()
+   */
+  private function read($len) { return $this->io->read($len); }
+
+  /**
+   * @uses AvroIO::is_eof()
+   */
+  private function is_eof() { return $this->io->is_eof(); }
+
+  private function skip_sync()
+  {
+    $proposed_sync_marker = $this->read(AvroDataIO::SYNC_SIZE);
+    if ($proposed_sync_marker != $this->sync_marker)
+    {
+      $this->seek(-AvroDataIO::SYNC_SIZE, AvroIO::SEEK_CUR);
+      return false;
+    }
+    return true;
+  }
+
+  /**
+   * Reads the block header (which includes the count of items in the block
+   * and the length in bytes of the block)
+   * @returns int length in bytes of the block.
+   */
+  private function read_block_header()
+  {
+    $this->block_count = $this->decoder->read_long();
+    return $this->decoder->read_long();
+  }
+
+}
+
+/**
+ * Writes Avro data to an AvroIO source using an AvroSchema
+ * @package Avro
+ */
+class AvroDataIOWriter
+{
+  /**
+   * @returns string a new, unique sync marker.
+   */
+  private static function generate_sync_marker()
+  {
+    // From http://php.net/manual/en/function.mt-rand.php comments
+    return pack('S8',
+                mt_rand(0, 0xffff), mt_rand(0, 0xffff),
+                mt_rand(0, 0xffff),
+                mt_rand(0, 0xffff) | 0x4000,
+                mt_rand(0, 0xffff) | 0x8000,
+                mt_rand(0, 0xffff), mt_rand(0, 0xffff), mt_rand(0, 0xffff));
+  }
+
+  /**
+   * @var AvroIO object container where data is written
+   */
+  private $io;
+
+  /**
+   * @var AvroIOBinaryEncoder encoder for object container
+   */
+  private $encoder;
+
+  /**
+   * @var AvroDatumWriter
+   */
+  private $datum_writer;
+
+  /**
+   * @var AvroStringIO buffer for writing
+   */
+  private $buffer;
+
+  /**
+   * @var AvroIOBinaryEncoder encoder for buffer
+   */
+  private $buffer_encoder; // AvroIOBinaryEncoder
+
+  /**
+   * @var int count of items written to block
+   */
+  private $block_count;
+
+  /**
+   * @var array map of object container metadata
+   */
+  private $metadata;
+
+  /**
+   * @param AvroIO $io
+   * @param AvroIODatumWriter $datum_writer
+   * @param AvroSchema $writers_schema
+   */
+  public function __construct($io, $datum_writer, $writers_schema=null)
+  {
+    if (!($io instanceof AvroIO))
+      throw new AvroDataIOException('io must be instance of AvroIO');
+
+    $this->io = $io;
+    $this->encoder = new AvroIOBinaryEncoder($this->io);
+    $this->datum_writer = $datum_writer;
+    $this->buffer = new AvroStringIO();
+    $this->buffer_encoder = new AvroIOBinaryEncoder($this->buffer);
+    $this->block_count = 0;
+    $this->metadata = array();
+
+    if ($writers_schema)
+    {
+      $this->sync_marker = self::generate_sync_marker();
+      $this->metadata[AvroDataIO::METADATA_CODEC_ATTR] = AvroDataIO::NULL_CODEC;
+      $this->metadata[AvroDataIO::METADATA_SCHEMA_ATTR] = strval($writers_schema);
+      $this->write_header();
+    }
+    else
+    {
+      $dfr = new AvroDataIOReader($this->io, new AvroIODatumReader());
+      $this->sync_marker = $dfr->sync_marker;
+      $this->metadata[AvroDataIO::METADATA_CODEC_ATTR] = $dfr->metadata[AvroDataIO::METADATA_CODEC_ATTR];
+
+      $schema_from_file = $dfr->metadata[AvroDataIO::METADATA_SCHEMA_ATTR];
+      $this->metadata[AvroDataIO::METADATA_SCHEMA_ATTR] = $schema_from_file;
+      $this->datum_writer->writers_schema = AvroSchema::parse($schema_from_file);
+      $this->seek(0, SEEK_END);
+    }
+  }
+
+  /**
+   * @param mixed $datum
+   */
+  public function append($datum)
+  {
+    $this->datum_writer->write($datum, $this->buffer_encoder);
+    $this->block_count++;
+
+    if ($this->buffer->length() >= AvroDataIO::SYNC_INTERVAL)
+      $this->write_block();
+  }
+
+  /**
+   * Flushes buffer to AvroIO object container and closes it.
+   * @return mixed value of $io->close()
+   * @see AvroIO::close()
+   */
+  public function close()
+  {
+    $this->flush();
+    return $this->io->close();
+  }
+
+  /**
+   * Flushes biffer to AvroIO object container.
+   * @returns mixed value of $io->flush()
+   * @see AvroIO::flush()
+   */
+  private function flush()
+  {
+    $this->write_block();
+    return $this->io->flush();
+  }
+
+  /**
+   * Writes a block of data to the AvroIO object container.
+   * @throws AvroDataIOException if the codec provided by the encoder
+   *         is not supported
+   * @internal Should the codec check happen in the constructor?
+   *           Why wait until we're writing data?
+   */
+  private function write_block()
+  {
+    if ($this->block_count > 0)
+    {
+      $this->encoder->write_long($this->block_count);
+      $to_write = strval($this->buffer);
+      $this->encoder->write_long(strlen($to_write));
+
+      if (AvroDataIO::is_valid_codec(
+            $this->metadata[AvroDataIO::METADATA_CODEC_ATTR]))
+        $this->write($to_write);
+      else
+        throw new AvroDataIOException(
+          sprintf('codec %s is not supported',
+                  $this->metadata[AvroDataIO::METADATA_CODEC_ATTR]));
+
+      $this->write($this->sync_marker);
+      $this->buffer->truncate();
+      $this->block_count = 0;
+    }
+  }
+
+  /**
+   * Writes the header of the AvroIO object container
+   */
+  private function write_header()
+  {
+    $this->write(AvroDataIO::magic());
+    $this->datum_writer->write_data(AvroDataIO::metadata_schema(),
+                                    $this->metadata, $this->encoder);
+    $this->write($this->sync_marker);
+  }
+
+  /**
+   * @param string $bytes
+   * @uses AvroIO::write()
+   */
+  private function write($bytes) { return $this->io->write($bytes); }
+
+  /**
+   * @param int $offset
+   * @param int $whence
+   * @uses AvroIO::seek()
+   */
+  private function seek($offset, $whence)
+  {
+    return $this->io->seek($offset, $whence);
+  }
+}

Added: avro/trunk/lang/php/lib/avro/datum.php
URL: http://svn.apache.org/viewvc/avro/trunk/lang/php/lib/avro/datum.php?rev=990860&view=auto
==============================================================================
--- avro/trunk/lang/php/lib/avro/datum.php (added)
+++ avro/trunk/lang/php/lib/avro/datum.php Mon Aug 30 16:50:40 2010
@@ -0,0 +1,984 @@
+<?php
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Classes for reading and writing Avro data to AvroIO objects.
+ *
+ * @package Avro
+ *
+ * @todo Implement JSON encoding, as is required by the Avro spec.
+ */
+
+/**
+ * Exceptions arising from writing or reading Avro data.
+ *
+ * @package Avro
+ */
+class AvroIOTypeException extends AvroException
+{
+  /**
+   * @param AvroSchema $expected_schema
+   * @param mixed $datum
+   */
+  public function __construct($expected_schema, $datum)
+  {
+    parent::__construct(sprintf('The datum %s is not an example of schema %s',
+                                var_export($datum, true), $expected_schema));
+  }
+}
+
+/**
+ * Exceptions arising from incompatibility between
+ * reader and writer schemas.
+ *
+ * @package Avro
+ */
+class AvroIOSchemaMatchException extends AvroException
+{
+  /**
+   * @param AvroSchema $writers_schema
+   * @param AvroSchema $readers_schema
+   */
+  function __construct($writers_schema, $readers_schema)
+  {
+    parent::__construct(
+      sprintf("Writer's schema %s and Reader's schema %s do not match.",
+              $writers_schema, $readers_schema));
+  }
+}
+
+/**
+ * Handles schema-specific writing of data to the encoder.
+ *
+ * Ensures that each datum written is consistent with the writer's schema.
+ *
+ * @package Avro
+ */
+class AvroIODatumWriter
+{
+  /**
+   * Schema used by this instance to write Avro data.
+   * @var AvroSchema
+   */
+  private $writers_schema;
+
+  /**
+   * @param AvroSchema $writers_schema
+   */
+  function __construct($writers_schema=null)
+  {
+    $this->writers_schema = $writers_schema;
+  }
+
+  /**
+   * @param AvroSchema $writers_schema
+   * @param $datum
+   * @param AvroIOBinaryEncoder $encoder
+   * @returns mixed
+   *
+   * @throws AvrioIOTypeException if $datum is invalid for $writers_schema
+   */
+  function write_data($writers_schema, $datum, $encoder)
+  {
+    if (!AvroSchema::is_valid_datum($writers_schema, $datum))
+      throw new AvroIOTypeException($writers_schema, $datum);
+
+    switch ($writers_schema->type())
+    {
+      case AvroSchema::NULL_TYPE:
+        return $encoder->write_null($datum);
+      case AvroSchema::BOOLEAN_TYPE:
+        return $encoder->write_boolean($datum);
+      case AvroSchema::INT_TYPE:
+        return $encoder->write_int($datum);
+      case AvroSchema::LONG_TYPE:
+        return $encoder->write_long($datum);
+      case AvroSchema::FLOAT_TYPE:
+        return $encoder->write_float($datum);
+      case AvroSchema::DOUBLE_TYPE:
+        return $encoder->write_double($datum);
+      case AvroSchema::STRING_TYPE:
+        return $encoder->write_string($datum);
+      case AvroSchema::BYTES_TYPE:
+        return $encoder->write_bytes($datum);
+      case AvroSchema::ARRAY_SCHEMA:
+        return $this->write_array($writers_schema, $datum, $encoder);
+      case AvroSchema::MAP_SCHEMA:
+        return $this->write_map($writers_schema, $datum, $encoder);
+      case AvroSchema::FIXED_SCHEMA:
+        return $this->write_fixed($writers_schema, $datum, $encoder);
+      case AvroSchema::ENUM_SCHEMA:
+      return $this->write_enum($writers_schema, $datum, $encoder);
+      case AvroSchema::RECORD_SCHEMA:
+      case AvroSchema::ERROR_SCHEMA:
+      case AvroSchema::REQUEST_SCHEMA:
+        return $this->write_record($writers_schema, $datum, $encoder);
+      case AvroSchema::UNION_SCHEMA:
+        return $this->write_union($writers_schema, $datum, $encoder);
+      default:
+        throw new AvroException(sprintf('Uknown type: %s',
+                                        $writers_schema->type));
+    }
+  }
+
+  /**
+   * @param $datum
+   * @param AvroIOBinaryEncoder $encoder
+   */
+  function write($datum, $encoder)
+  {
+    $this->write_data($this->writers_schema, $datum, $encoder);
+  }
+
+  /**#@+
+   * @param AvroSchema $writers_schema
+   * @param null|boolean|int|float|string|array $datum item to be written
+   * @param AvroIOBinaryEncoder $encoder
+   */
+  private function write_array($writers_schema, $datum, $encoder)
+  {
+    $datum_count = count($datum);
+    if (0 < $datum_count)
+    {
+      $encoder->write_long($datum_count);
+      $items = $writers_schema->items();
+      foreach ($datum as $item)
+        $this->write_data($items, $item, $encoder);
+    }
+    return $encoder->write_long(0);
+  }
+
+  private function write_map($writers_schema, $datum, $encoder)
+  {
+    $datum_count = count($datum);
+    if ($datum_count > 0)
+    {
+      $encoder->write_long($datum_count);
+      foreach ($datum as $k => $v)
+      {
+        $encoder->write_string($k);
+        $this->write_data($writers_schema->values(), $v, $encoder);
+      }
+    }
+    $encoder->write_long(0);
+  }
+
+  private function write_union($writers_schema, $datum, $encoder)
+  {
+    $datum_schema_index = -1;
+    $datum_schema = null;
+    foreach ($writers_schema->schemas() as $index => $schema)
+      if (AvroSchema::is_valid_datum($schema, $datum))
+      {
+        $datum_schema_index = $index;
+        $datum_schema = $schema;
+        break;
+      }
+
+    if (is_null($datum_schema))
+      throw new AvroIOTypeException($writers_schema, $datum);
+
+    $encoder->write_long($datum_schema_index);
+    $this->write_data($datum_schema, $datum, $encoder);
+  }
+
+  private function write_enum($writers_schema, $datum, $encoder)
+  {
+    $datum_index = $writers_schema->symbol_index($datum);
+    return $encoder->write_int($datum_index);
+  }
+
+  private function write_fixed($writers_schema, $datum, $encoder)
+  {
+    /**
+     * NOTE Unused $writers_schema parameter included for consistency
+     * with other write_* methods.
+     */
+    return $encoder->write($datum);
+  }
+
+  private function write_record($writers_schema, $datum, $encoder)
+  {
+    foreach ($writers_schema->fields() as $field)
+      $this->write_data($field->type(), $datum[$field->name()], $encoder);
+  }
+
+  /**#@-*/
+}
+
+/**
+ * Encodes and writes Avro data to an AvroIO object using
+ * Avro binary encoding.
+ *
+ * @package Avro
+ */
+class AvroIOBinaryEncoder
+{
+  /**
+   * Performs encoding of the given float value to a binary string
+   *
+   * XXX: This is <b>not</b> endian-aware! The {@link Avro::check_platform()}
+   * called in {@link AvroIOBinaryEncoder::__construct()} should ensure the
+   * library is only used on little-endian platforms, which ensure the little-endian
+   * encoding required by the Avro spec.
+   *
+   * @param float $float
+   * @returns string bytes
+   * @see Avro::check_platform()
+   */
+  static function float_to_int_bits($float)
+  {
+    return pack('f', (float) $float);
+  }
+
+  /**
+   * Performs encoding of the given double value to a binary string
+   *
+   * XXX: This is <b>not</b> endian-aware! See comments in
+   * {@link AvroIOBinaryEncoder::float_to_int_bits()} for details.
+   *
+   * @param double $double
+   * @returns string bytes
+   */
+  static function double_to_long_bits($double)
+  {
+    return pack('d', (double) $double);
+  }
+
+  /**
+   * @param int|string $n
+   * @returns string long $n encoded as bytes
+   * @internal This relies on 64-bit PHP.
+   */
+  static public function encode_long($n)
+  {
+    $n = (int) $n;
+    $n = ($n << 1) ^ ($n >> 63);
+    $str = '';
+    while (0 != ($n & ~0x7F))
+    {
+      $str .= chr(($n & 0x7F) | 0x80);
+      $n >>= 7;
+    }
+    $str .= chr($n);
+    return $str;
+  }
+
+  /**
+   * @var AvroIO
+   */
+  private $io;
+
+  /**
+   * @param AvroIO $io object to which data is to be written.
+   *
+   */
+  function __construct($io)
+  {
+    Avro::check_platform();
+    $this->io = $io;
+  }
+
+  /**
+   * @param null $datum actual value is ignored
+   */
+  function write_null($datum) { return null; }
+
+  /**
+   * @param boolean $datum
+   */
+  function write_boolean($datum)
+  {
+    $byte = $datum ? chr(1) : chr(0);
+    $this->write($byte);
+  }
+
+  /**
+   * @param int $datum
+   */
+  function write_int($datum) { $this->write_long($datum); }
+
+  /**
+   * @param int $n
+   */
+  function write_long($n)
+  {
+    if (Avro::uses_gmp())
+      $this->write(AvroGMP::encode_long($n));
+    else
+      $this->write(self::encode_long($n));
+  }
+
+  /**
+   * @param float $datum
+   * @uses self::float_to_int_bits()
+   */
+  public function write_float($datum)
+  {
+    $this->write(self::float_to_int_bits($datum));
+  }
+
+  /**
+   * @param float $datum
+   * @uses self::double_to_long_bits()
+   */
+  public function write_double($datum)
+  {
+    $this->write(self::double_to_long_bits($datum));
+  }
+
+  /**
+   * @param string $str
+   * @uses self::write_bytes()
+   */
+  function write_string($str) { $this->write_bytes($str); }
+
+  /**
+   * @param string $bytes
+   */
+  function write_bytes($bytes)
+  {
+    $this->write_long(strlen($bytes));
+    $this->write($bytes);
+  }
+
+  /**
+   * @param string $datum
+   */
+  function write($datum) { $this->io->write($datum); }
+}
+
+/**
+ * Handles schema-specifc reading of data from the decoder.
+ *
+ * Also handles schema resolution between the reader and writer
+ * schemas (if a writer's schema is provided).
+ *
+ * @package Avro
+ */
+class AvroIODatumReader
+{
+  /**
+   *
+   * @param AvroSchema $writers_schema
+   * @param AvroSchema $readers_schema
+   * @returns boolean true if the schemas are consistent with
+   *                  each other and false otherwise.
+   */
+  static function schemas_match($writers_schema, $readers_schema)
+  {
+    $writers_schema_type = $writers_schema->type;
+    $readers_schema_type = $readers_schema->type;
+
+    if (AvroSchema::UNION_SCHEMA == $writers_schema_type
+        || AvroSchema::UNION_SCHEMA == $readers_schema_type)
+      return true;
+
+    if ($writers_schema_type == $readers_schema_type)
+    {
+      if (AvroSchema::is_primitive_type($writers_schema_type))
+        return true;
+
+      switch ($readers_schema_type)
+      {
+        case AvroSchema::MAP_SCHEMA:
+          return self::attributes_match($writers_schema->values(),
+                                        $readers_schema->values(),
+                                        array(AvroSchema::TYPE_ATTR));
+        case AvroSchema::ARRAY_SCHEMA:
+          return self::attributes_match($writers_schema->items(),
+                                        $readers_schema->items(),
+                                        array(AvroSchema::TYPE_ATTR));
+        case AvroSchema::ENUM_SCHEMA:
+          return self::attributes_match($writers_schema, $readers_schema,
+                                        array(AvroSchema::FULLNAME_ATTR));
+        case AvroSchema::FIXED_SCHEMA:
+          return self::attributes_match($writers_schema, $readers_schema,
+                                        array(AvroSchema::FULLNAME_ATTR,
+                                              AvroSchema::SIZE_ATTR));
+        case AvroSchema::RECORD_SCHEMA:
+        case AvroSchema::ERROR_SCHEMA:
+          return self::attributes_match($writers_schema, $readers_schema,
+                                        array(AvroSchema::FULLNAME_ATTR));
+        case AvroSchema::REQUEST_SCHEMA:
+          // XXX: This seems wrong
+          return true;
+          // XXX: no default
+      }
+
+      if (AvroSchema::INT_TYPE == $writers_schema_type
+          && in_array($readers_schema_type, array(AvroSchema::LONG_TYPE,
+                                                  AvroSchema::FLOAT_TYPE,
+                                                  AvroSchema::DOUBLE_TYPE)))
+        return true;
+
+      if (AvroSchema::LONG_TYPE == $writers_schema_type
+          && in_array($readers_schema_type, array(AvroSchema::FLOAT_TYPE,
+                                                  AvroSchema::DOUBLE_TYPE)))
+        return true;
+
+      if (AvroSchema::FLOAT_TYPE == $writers_schema_type
+          && AvroSchema::DOUBLE_TYPE == $readers_schema_type)
+        return true;
+
+      return false;
+    }
+
+  }
+
+  /**
+   * Checks equivalence of the given attributes of the two given schemas.
+   *
+   * @param AvroSchema $schema_one
+   * @param AvroSchema $schema_two
+   * @param string[] $attribute_names array of string attribute names to compare
+   *
+   * @returns boolean true if the attributes match and false otherwise.
+   */
+  static function attributes_match($schema_one, $schema_two, $attribute_names)
+  {
+    foreach ($attribute_names as $attribute_name)
+      if ($schema_one->attribute($attribute_name)
+          != $schema_two->attribute($attribute_name))
+        return false;
+    return true;
+  }
+
+  /**
+   * @var AvroSchema
+   */
+  private $writers_schema;
+
+  /**
+   * @var AvroSchema
+   */
+  private $readers_schema;
+
+  /**
+   * @param AvroSchema $writers_schema
+   * @param AvroSchema $readers_schema
+   */
+  function __construct($writers_schema=null, $readers_schema=null)
+  {
+    $this->writers_schema = $writers_schema;
+    $this->readers_schema = $readers_schema;
+  }
+
+  /**
+   * @param AvroSchema $readers_schema
+   */
+  public function set_writers_schema($readers_schema)
+  {
+    $this->writers_schema = $readers_schema;
+  }
+
+  /**
+   * @param AvroIOBinaryDecoder $decoder
+   * @returns string
+   */
+  public function read($decoder)
+  {
+    if (is_null($this->readers_schema))
+      $this->readers_schema = $this->writers_schema;
+    return $this->read_data($this->writers_schema, $this->readers_schema,
+                            $decoder);
+  }
+
+  /**#@+
+   * @param AvroSchema $writers_schema
+   * @param AvroSchema $readers_schema
+   * @param AvroIOBinaryDecoder $decoder
+   */
+  /**
+   * @returns mixed
+   */
+  public function read_data($writers_schema, $readers_schema, $decoder)
+  {
+    if (!self::schemas_match($writers_schema, $readers_schema))
+      throw new AvroIOSchemaMatchException($writers_schema, $readers_schema);
+
+    // Schema resolution: reader's schema is a union, writer's schema is not
+    if (AvroSchema::UNION_SCHEMA == $readers_schema->type()
+        && AvroSchema::UNION_SCHEMA != $writers_schema->type())
+    {
+      foreach ($readers_schema->schemas() as $schema)
+        if (self::schemas_match($writers_schema, $schema))
+          return $this->read_data($writers_schema, $schema, $decoder);
+      throw new AvroIOSchemaMatchException($writers_schema, $readers_schema);
+    }
+
+    switch ($writers_schema->type())
+    {
+      case AvroSchema::NULL_TYPE:
+        return $decoder->read_null();
+      case AvroSchema::BOOLEAN_TYPE:
+        return $decoder->read_boolean();
+      case AvroSchema::INT_TYPE:
+        return $decoder->read_int();
+      case AvroSchema::LONG_TYPE:
+        return $decoder->read_long();
+      case AvroSchema::FLOAT_TYPE:
+        return $decoder->read_float();
+      case AvroSchema::DOUBLE_TYPE:
+        return $decoder->read_double();
+      case AvroSchema::STRING_TYPE:
+        return $decoder->read_string();
+      case AvroSchema::BYTES_TYPE:
+        return $decoder->read_bytes();
+      case AvroSchema::ARRAY_SCHEMA:
+        return $this->read_array($writers_schema, $readers_schema, $decoder);
+      case AvroSchema::MAP_SCHEMA:
+        return $this->read_map($writers_schema, $readers_schema, $decoder);
+      case AvroSchema::UNION_SCHEMA:
+        return $this->read_union($writers_schema, $readers_schema, $decoder);
+      case AvroSchema::ENUM_SCHEMA:
+        return $this->read_enum($writers_schema, $readers_schema, $decoder);
+      case AvroSchema::FIXED_SCHEMA:
+        return $this->read_fixed($writers_schema, $readers_schema, $decoder);
+      case AvroSchema::RECORD_SCHEMA:
+      case AvroSchema::ERROR_SCHEMA:
+      case AvroSchema::REQUEST_SCHEMA:
+        return $this->read_record($writers_schema, $readers_schema, $decoder);
+      default:
+        throw new AvroException(sprintf("Cannot read unknown schema type: %s",
+                                        $writers_schema->type()));
+    }
+  }
+
+  /**
+   * @returns array
+   */
+  public function read_array($writers_schema, $readers_schema, $decoder)
+  {
+    $items = array();
+    $block_count = $decoder->read_long();
+    while (0 != $block_count)
+    {
+      if ($block_count < 0)
+      {
+        $block_count = -$block_count;
+        $block_size = $decoder->read_long(); // Read (and ignore) block size
+      }
+      for ($i = 0; $i < $block_count; $i++)
+        $items []= $this->read_data($writers_schema->items(),
+                                    $readers_schema->items(),
+                                    $decoder);
+      $block_count = $decoder->read_long();
+    }
+    return $items;
+  }
+
+  /**
+   * @returns array
+   */
+  public function read_map($writers_schema, $readers_schema, $decoder)
+  {
+    $items = array();
+    $pair_count = $decoder->read_long();
+    while (0 != $pair_count)
+    {
+      if ($pair_count < 0)
+      {
+        $pair_count = -$pair_count;
+        // Note: we're not doing anything with block_size other than skipping it
+        $block_size = $decoder->read_long();
+      }
+
+      for ($i = 0; $i < $pair_count; $i++)
+      {
+        $key = $decoder->read_string();
+        $items[$key] = $this->read_data($writers_schema->values(),
+                                        $readers_schema->values(),
+                                        $decoder);
+      }
+      $pair_count = $decoder->read_long();
+    }
+    return $items;
+  }
+
+  /**
+   * @returns mixed
+   */
+  public function read_union($writers_schema, $readers_schema, $decoder)
+  {
+    $schema_index = $decoder->read_long();
+    $selected_writers_schema = $writers_schema->schema_by_index($schema_index);
+    return $this->read_data($selected_writers_schema, $readers_schema, $decoder);
+  }
+
+  /**
+   * @returns string
+   */
+  public function read_enum($writers_schema, $readers_schema, $decoder)
+  {
+    $symbol_index = $decoder->read_int();
+    $symbol = $writers_schema->symbol_by_index($symbol_index);
+    if (!$readers_schema->has_symbol($symbol))
+      null; // FIXME: unset wrt schema resolution
+    return $symbol;
+  }
+
+  /**
+   * @returns string
+   */
+  public function read_fixed($writers_schema, $readers_schema, $decoder)
+  {
+    return $decoder->read($writers_schema->size());
+  }
+
+  /**
+   * @returns array
+   */
+  public function read_record($writers_schema, $readers_schema, $decoder)
+  {
+    $readers_fields = $readers_schema->fields_hash();
+    $record = array();
+    foreach ($writers_schema->fields() as $writers_field)
+    {
+      $type = $writers_field->type();
+      if (isset($readers_fields[$writers_field->name()]))
+        $record[$writers_field->name()]
+          = $this->read_data($type,
+                             $readers_fields[$writers_field->name()]->type(),
+                             $decoder);
+      else
+        $this->skip_data($type, $decoder);
+    }
+    // Fill in default values
+    if (count($readers_fields) > count($record))
+    {
+      $writers_fields = $writers_schema->fields_hash();
+      foreach ($readers_fields as $field_name => $field)
+      {
+        if (!isset($writers_fields[$field_name]))
+        {
+          if ($field->has_default_value())
+            $record[$field->name()]
+              = $this->read_default_value($field->type(),
+                                          $field->default_value());
+          else
+            null; // FIXME: unset
+        }
+      }
+    }
+
+    return $record;
+  }
+  /**#@-*/
+
+  /**
+   * @param AvroSchema $field_schema
+   * @param null|boolean|int|float|string|array $default_value
+   * @returns null|boolean|int|float|string|array
+   *
+   * @throws AvroException if $field_schema type is unknown.
+   */
+  public function read_default_value($field_schema, $default_value)
+  {
+    switch($field_schema->type())
+    {
+      case AvroSchema::NULL_TYPE:
+        return null;
+      case AvroSchema::BOOLEAN_TYPE:
+        return $default_value;
+      case AvroSchema::INT_TYPE:
+      case AvroSchema::LONG_TYPE:
+        return (int) $default_value;
+      case AvroSchema::FLOAT_TYPE:
+      case AvroSchema::DOUBLE_TYPE:
+        return (float) $default_value;
+      case AvroSchema::STRING_TYPE:
+      case AvroSchema::BYTES_TYPE:
+        return $default_value;
+      case AvroSchema::ARRAY_SCHEMA:
+        $array = array();
+        foreach ($default_value as $json_val)
+        {
+          $val = $this->read_default_value($field_schema->items(), $json_val);
+          $array []= $val;
+        }
+        return $array;
+      case AvroSchema::MAP_SCHEMA:
+        $map = array();
+        foreach ($default_value as $key => $json_val)
+          $map[$key] = $this->read_default_value($field_schema->values(),
+                                                 $json_val);
+        return $map;
+      case AvroSchema::UNION_SCHEMA:
+        return $this->read_default_value($field_schema->schema_by_index(0),
+                                         $default_value);
+      case AvroSchema::ENUM_SCHEMA:
+      case AvroSchema::FIXED_SCHEMA:
+        return $default_value;
+      case AvroSchema::RECORD_SCHEMA:
+        $record = array();
+        foreach ($field_schema->fields() as $field)
+        {
+          $field_name = $field->name();
+          if (!$json_val = $default_value[$field_name])
+            $json_val = $field->default_value();
+
+          $record[$field_name] = $this->read_default_value($field->type(),
+                                                           $json_val);
+        }
+        return $record;
+    default:
+      throw new AvroException(sprintf('Unknown type: %s', $field_schema->type()));
+    }
+  }
+
+  /**
+   * @param AvroSchema $writers_schema
+   * @param AvroIOBinaryDecoder $decoder
+   */
+  private function skip_data($writers_schema, $decoder)
+  {
+    switch ($writers_schema->type())
+    {
+      case AvroSchema::NULL_TYPE:
+        return $decoder->skip_null();
+      case AvroSchema::BOOLEAN_TYPE:
+        return $decoder->skip_boolean();
+      case AvroSchema::INT_TYPE:
+        return $decoder->skip_int();
+      case AvroSchema::LONG_TYPE:
+        return $decoder->skip_long();
+      case AvroSchema::FLOAT_TYPE:
+        return $decoder->skip_float();
+      case AvroSchema::DOUBLE_TYPE:
+        return $decoder->skip_double();
+      case AvroSchema::STRING_TYPE:
+        return $decoder->skip_string();
+      case AvroSchema::BYTES_TYPE:
+        return $decoder->skip_bytes();
+      case AvroSchema::ARRAY_SCHEMA:
+        return $decoder->skip_array($writers_schema, $decoder);
+      case AvroSchema::MAP_SCHEMA:
+        return $decoder->skip_map($writers_schema, $decoder);
+      case AvroSchema::UNION_SCHEMA:
+        return $decoder->skip_union($writers_schema, $decoder);
+      case AvroSchema::ENUM_SCHEMA:
+        return $decoder->skip_enum($writers_schema, $decoder);
+      case AvroSchema::FIXED_SCHEMA:
+        return $decoder->skip_fixed($writers_schema, $decoder);
+      case AvroSchema::RECORD_SCHEMA:
+      case AvroSchema::ERROR_SCHEMA:
+      case AvroSchema::REQUEST_SCHEMA:
+        return $decoder->skip_record($writers_schema, $decoder);
+      default:
+        throw new AvroException(sprintf('Uknown schema type: %s',
+                                        $writers_schema->type()));
+    }
+  }
+}
+
+/**
+ * Decodes and reads Avro data from an AvroIO object encoded using
+ * Avro binary encoding.
+ *
+ * @package Avro
+ */
+class AvroIOBinaryDecoder
+{
+
+  /**
+   * @param int[] array of byte ascii values
+   * @returns long decoded value
+   * @internal Requires 64-bit platform
+   */
+  public static function decode_long_from_array($bytes)
+  {
+    $b = array_shift($bytes);
+    $n = $b & 0x7f;
+    $shift = 7;
+    while (0 != ($b & 0x80))
+    {
+      $b = array_shift($bytes);
+      $n |= (($b & 0x7f) << $shift);
+      $shift += 7;
+    }
+    return (($n >> 1) ^ -($n & 1));
+  }
+
+  /**
+   * Performs decoding of the binary string to a float value.
+   *
+   * XXX: This is <b>not</b> endian-aware! See comments in
+   * {@link AvroIOBinaryEncoder::float_to_int_bits()} for details.
+   *
+   * @param string $bits
+   * @returns float
+   */
+  static public function int_bits_to_float($bits)
+  {
+    $float = unpack('f', $bits);
+    return (float) $float[1];
+  }
+
+  /**
+   * Performs decoding of the binary string to a double value.
+   *
+   * XXX: This is <b>not</b> endian-aware! See comments in
+   * {@link AvroIOBinaryEncoder::float_to_int_bits()} for details.
+   *
+   * @param string $bits
+   * @returns float
+   */
+  static public function long_bits_to_double($bits)
+  {
+    $double = unpack('d', $bits);
+    return (double) $double[1];
+  }
+
+  /**
+   * @var AvroIO
+   */
+  private $io;
+
+  /**
+   * @param AvroIO $io object from which to read.
+   */
+  public function __construct($io)
+  {
+    Avro::check_platform();
+    $this->io = $io;
+  }
+
+  /**
+   * @returns string the next byte from $this->io.
+   * @throws AvroException if the next byte cannot be read.
+   */
+  private function next_byte() { return $this->read(1); }
+
+  /**
+   * @returns null
+   */
+  public function read_null() { return null; }
+
+  /**
+   * @returns boolean
+   */
+  public function read_boolean()
+  {
+    return (boolean) (1 == ord($this->next_byte()));
+  }
+
+  /**
+   * @returns int
+   */
+  public function read_int() { return (int) $this->read_long(); }
+
+  /**
+   * @returns long
+   */
+  public function read_long()
+  {
+    $byte = ord($this->next_byte());
+    $bytes = array($byte);
+    while (0 != ($byte & 0x80))
+    {
+      $byte = ord($this->next_byte());
+      $bytes []= $byte;
+    }
+
+    if (Avro::uses_gmp())
+      return AvroGMP::decode_long_from_array($bytes);
+
+    return self::decode_long_from_array($bytes);
+  }
+
+  /**
+   * @returns float
+   */
+  public function read_float()
+  {
+    return self::int_bits_to_float($this->read(4));
+  }
+
+  /**
+   * @returns double
+   */
+  public function read_double()
+  {
+    return self::long_bits_to_double($this->read(8));
+  }
+
+  /**
+   * A string is encoded as a long followed by that many bytes
+   * of UTF-8 encoded character data.
+   * @returns string
+   */
+  public function read_string() { return $this->read_bytes(); }
+
+  /**
+   * @returns string
+   */
+  public function read_bytes() { return $this->read($this->read_long()); }
+
+  /**
+   * @param int $len count of bytes to read
+   * @returns string
+   */
+  public function read($len) { return $this->io->read($len); }
+
+  public function skip_null() { return null; }
+
+  public function skip_boolean() { return $this->skip(1); }
+
+  public function skip_int() { return $this->skip_long(); }
+
+  protected function skip_long()
+  {
+    $b = $this->next_byte();
+    while (0 != ($b & 0x80))
+      $b = $this->next_byte();
+  }
+
+  public function skip_float() { return $this->skip(4); }
+
+  public function skip_double() { return $this->skip(8); }
+
+  public function skip_bytes() { return $this->skip($this->read_long()); }
+
+  public function skip_string() { return $this->skip_bytes(); }
+
+  /**
+   * @param int $len count of bytes to skip
+   * @uses AvroIO::seek()
+   */
+  public function skip($len) { $this->seek($len, AvroIO::SEEK_CUR); }
+
+  /**
+   * @returns int position of pointer in AvroIO instance
+   * @uses AvroIO::tell()
+   */
+  private function tell() { return $this->io->tell(); }
+
+  /**
+   * @param int $offset
+   * @param int $whence
+   * @returns boolean true upon success
+   * @uses AvroIO::seek()
+   */
+  private function seek($offset, $whence)
+  {
+    return $this->io->seek($offset, $whence);
+  }
+}
+

Added: avro/trunk/lang/php/lib/avro/debug.php
URL: http://svn.apache.org/viewvc/avro/trunk/lang/php/lib/avro/debug.php?rev=990860&view=auto
==============================================================================
--- avro/trunk/lang/php/lib/avro/debug.php (added)
+++ avro/trunk/lang/php/lib/avro/debug.php Mon Aug 30 16:50:40 2010
@@ -0,0 +1,194 @@
+<?php
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @package Avro
+ */
+
+/**
+ * Avro library code debugging functions
+ * @package Avro
+ */
+class AvroDebug
+{
+
+  /**
+   * @var int high debug level
+   */
+  const DEBUG5 = 5;
+  /**
+   * @var int low debug level
+   */
+  const DEBUG1 = 1;
+  /**
+   * @var int current debug level
+   */
+  const DEBUG_LEVEL = self::DEBUG1;
+
+  /**
+   * @var int $debug_level
+   * @returns boolean true if the given $debug_level is equivalent
+   *                  or more verbose than than the current debug level
+   *                  and false otherwise.
+   */
+  static function is_debug($debug_level=self::DEBUG1)
+  {
+    return (self::DEBUG_LEVEL >= $debug_level);
+  }
+
+  /**
+   * @param string $format format string for the given arguments. Passed as is
+   *                     to <code>vprintf</code>.
+   * @param array  $args   array of arguments to pass to vsprinf.
+   * @param int    $debug_level debug level at which to print this statement
+   * @returns boolean true
+   */
+  static function debug($format, $args, $debug_level=self::DEBUG1)
+  {
+    if (self::is_debug($debug_level))
+      vprintf($format . "\n", $args);
+    return true;
+  }
+
+  /**
+   * @param string $str
+   * @returns string[] array of hex representation of each byte of $str
+   */
+  static function hex_array($str) { return self::bytes_array($str); }
+
+  /**
+   * @param string $str
+   * @param string $joiner string used to join
+   * @returns string hex-represented bytes of each byte of $str
+                     joined by $joiner
+   */
+  static function hex_string($str, $joiner=' ')
+  {
+    return join($joiner, self::hex_array($str));
+  }
+
+  /**
+   * @param string $str
+   * @param string $format format to represent bytes
+   * @returns string[] array of each byte of $str formatted using $format
+   */
+  static function bytes_array($str, $format='x%02x')
+  {
+    $x = array();
+    foreach (str_split($str) as $b)
+      $x []= sprintf($format, ord($b));
+    return $x;
+  }
+
+  /**
+   * @param string $str
+   * @returns string[] array of bytes of $str represented in decimal format ('%3d')
+   */
+  static function dec_array($str) { return self::bytes_array($str, '%3d'); }
+
+  /**
+   * @param string $str
+   * @param string $joiner string to join bytes of $str
+   * @returns string of bytes of $str represented in decimal format
+   * @uses dec_array()
+   */
+  static function dec_string($str, $joiner = ' ')
+  {
+    return join($joiner, self::dec_array($str));
+  }
+
+  /**
+   * @param string $str
+   * @param string $format one of 'ctrl', 'hex', or 'dec' for control,
+                           hexadecimal, or decimal format for bytes.
+                           - ctrl: ASCII control characters represented as text.
+                             For example, the null byte is represented as 'NUL'.
+                             Visible ASCII characters represent themselves, and
+                             others are represented as a decimal ('%03d')
+                           - hex: bytes represented in hexadecimal ('%02X')
+                           - dec: bytes represented in decimal ('%03d')
+   * @returns string[] array of bytes represented in the given format.
+   */
+  static function ascii_array($str, $format='ctrl')
+  {
+    if (!in_array($format, array('ctrl', 'hex', 'dec')))
+      throw new AvroException('Unrecognized format specifier');
+
+    $ctrl_chars = array('NUL', 'SOH', 'STX', 'ETX', 'EOT', 'ENQ', 'ACK', 'BEL',
+                        'BS',  'HT',  'LF',  'VT',  'FF',  'CR',  'SO',  'SI',
+                        'DLE', 'DC1', 'DC2', 'DC3', 'DC4', 'NAK', 'SYN', 'ETB',
+                        'CAN', 'EM',  'SUB', 'ESC', 'FS',  'GS',  'RS',  'US');
+    $x = array();
+    foreach (str_split($str) as $b)
+    {
+      $db = ord($b);
+      if ($db < 32)
+      {
+        switch ($format)
+        {
+          case 'ctrl':
+            $x []= str_pad($ctrl_chars[$db], 3, ' ', STR_PAD_LEFT);
+            break;
+          case 'hex':
+            $x []= sprintf("x%02X", $db);
+            break;
+          case 'dec':
+            $x []= str_pad($db, 3, '0', STR_PAD_LEFT);
+            break;
+        }
+      }
+      else if ($db < 127)
+        $x []= "  $b";
+      else if ($db == 127)
+      {
+        switch ($format)
+        {
+          case 'ctrl':
+            $x []= 'DEL';
+            break;
+          case 'hex':
+            $x []= sprintf("x%02X", $db);
+            break;
+          case 'dec':
+            $x []= str_pad($db, 3, '0', STR_PAD_LEFT);
+            break;
+        }
+      }
+      else
+        if ('hex' == $format)
+          $x []= sprintf("x%02X", $db);
+        else
+          $x []= str_pad($db, 3, '0', STR_PAD_LEFT);
+    }
+    return $x;
+  }
+
+  /**
+   * @param string $str
+   * @param string $format one of 'ctrl', 'hex', or 'dec'.
+   *                       See {@link self::ascii_array()} for more description
+   * @param string $joiner
+   * @returns string of bytes joined by $joiner
+   * @uses ascii_array()
+   */
+  static function ascii_string($str, $format='ctrl', $joiner = ' ')
+  {
+    return join($joiner, self::ascii_array($str, $format));
+  }
+}

Added: avro/trunk/lang/php/lib/avro/gmp.php
URL: http://svn.apache.org/viewvc/avro/trunk/lang/php/lib/avro/gmp.php?rev=990860&view=auto
==============================================================================
--- avro/trunk/lang/php/lib/avro/gmp.php (added)
+++ avro/trunk/lang/php/lib/avro/gmp.php Mon Aug 30 16:50:40 2010
@@ -0,0 +1,222 @@
+<?php
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @package Avro
+ */
+
+/**
+ * Methods for handling 64-bit operations using the GMP extension.
+ *
+ * This is a naive and hackish implementation that is intended
+ * to work well enough to support Avro. It has not been tested
+ * beyond what's needed to decode and encode long values.
+ *
+ * @package Avro
+ */
+class AvroGMP {
+
+  /**
+   * @var resource memoized GMP resource for zero
+   */
+  private static $gmp_0;
+
+  /**
+   * @returns resource GMP resource for zero
+   */
+  private static function gmp_0()
+  {
+    if (!isset(self::$gmp_0))
+      self::$gmp_0 = gmp_init('0');
+    return self::$gmp_0;
+  }
+
+  /**
+   * @var resource memoized GMP resource for one (1)
+   */
+  private static $gmp_1;
+
+  /**
+   * @returns resource GMP resource for one (1)
+   */
+  private static function gmp_1()
+  {
+    if (!isset(self::$gmp_1))
+      self::$gmp_1 = gmp_init('1');
+    return self::$gmp_1;
+  }
+
+  /**
+   * @var resource memoized GMP resource for two (2)
+   */
+  private static $gmp_2;
+
+  /**
+   * @returns resource GMP resource for two (2)
+   */
+  private static function gmp_2()
+  {
+    if (!isset(self::$gmp_2))
+      self::$gmp_2 = gmp_init('2');
+    return self::$gmp_2;
+  }
+
+  /**
+   * @var resource memoized GMP resource for 0x7f
+   */
+  private static $gmp_0x7f;
+
+  /**
+   * @returns resource GMP resource for 0x7f
+   */
+  private static function gmp_0x7f()
+  {
+    if (!isset(self::$gmp_0x7f))
+      self::$gmp_0x7f = gmp_init('0x7f');
+    return self::$gmp_0x7f;
+  }
+
+  /**
+   * @var resource memoized GMP resource for 64-bit ~0x7f
+   */
+  private static $gmp_n0x7f;
+
+  /**
+   * @returns resource GMP resource for 64-bit ~0x7f
+   */
+  private static function gmp_n0x7f()
+  {
+    if (!isset(self::$gmp_n0x7f))
+      self::$gmp_n0x7f = gmp_init('0xffffffffffffff80');
+    return self::$gmp_n0x7f;
+  }
+
+  /**
+   * @var resource memoized GMP resource for 64-bits of 1
+   */
+  private static $gmp_0xfs;
+
+  /**
+   * @returns resource GMP resource for 64-bits of 1
+   */
+  private static function gmp_0xfs()
+  {
+    if (!isset(self::$gmp_0xfs))
+      self::$gmp_0xfs = gmp_init('0xffffffffffffffff');
+    return self::$gmp_0xfs;
+  }
+
+  /**
+   * @param GMP resource
+   * @returns GMP resource 64-bit two's complement of input.
+   */
+  static function gmp_twos_complement($g)
+  {
+    return gmp_neg(gmp_sub(gmp_pow(self::gmp_2(), 64), $g));
+  }
+
+  /**
+   * @interal Only works up to shift 63 (doesn't wrap bits around).
+   * @param resource|int|string $g
+   * @param int $shift number of bits to shift left
+   * @returns resource $g shifted left
+   */
+  static function shift_left($g, $shift)
+  {
+    if (0 == $shift)
+      return $g;
+
+    if (0 > gmp_sign($g))
+      $g = self::gmp_twos_complement($g);
+
+    $m = gmp_mul($g, gmp_pow(self::gmp_2(), $shift));
+    $m = gmp_and($m, self::gmp_0xfs());
+    if (gmp_testbit($m, 63))
+      $m = gmp_neg(gmp_add(gmp_and(gmp_com($m), self::gmp_0xfs()),
+                           self::gmp_1()));
+    return $m;
+  }
+
+  /**
+   * Arithmetic right shift
+   * @param resource|int|string $g
+   * @param int $shift number of bits to shift right
+   * @returns resource $g shifted right $shift bits
+   */
+  static function shift_right($g, $shift)
+  {
+    if (0 == $shift)
+      return $g;
+
+    if (0 <= gmp_sign($g))
+      $m = gmp_div($g, gmp_pow(self::gmp_2(), $shift));
+    else // negative
+    {
+      $g = gmp_and($g, self::gmp_0xfs());
+      $m = gmp_div($g, gmp_pow(self::gmp_2(), $shift));
+      $m = gmp_and($m, self::gmp_0xfs());
+      for ($i = 63; $i >= (63 - $shift); $i--)
+        gmp_setbit($m, $i);
+
+      $m = gmp_neg(gmp_add(gmp_and(gmp_com($m), self::gmp_0xfs()),
+                           self::gmp_1()));
+    }
+
+    return $m;
+  }
+
+  /**
+   * @param int|str $n integer (or string representation of integer) to encode
+   * @return string $bytes of the long $n encoded per the Avro spec
+   */
+  static function encode_long($n)
+  {
+    $g = gmp_init($n);
+    $g = gmp_xor(self::shift_left($g, 1),
+                 self::shift_right($g, 63));
+    $bytes = '';
+    while (0 != gmp_cmp(self::gmp_0(), gmp_and($g, self::gmp_n0x7f())))
+    {
+      $bytes .= chr(gmp_intval(gmp_and($g, self::gmp_0x7f())) | 0x80);
+      $g = self::shift_right($g, 7);
+    }
+    $bytes .= chr(gmp_intval($g));
+    return $bytes;
+  }
+
+  /**
+   * @param int[] $bytes array of ascii codes of bytes to decode
+   * @return string represenation of decoded long.
+   */
+  static function decode_long_from_array($bytes)
+  {
+    $b = array_shift($bytes);
+    $g = gmp_init($b & 0x7f);
+    $shift = 7;
+    while (0 != ($b & 0x80))
+    {
+      $b = array_shift($bytes);
+      $g = gmp_or($g, self::shift_left(($b & 0x7f), $shift));
+      $shift += 7;
+    }
+    $val = gmp_xor(self::shift_right($g, 1), gmp_neg(gmp_and($g, 1)));
+    return gmp_strval($val);
+  }
+
+}

Added: avro/trunk/lang/php/lib/avro/io.php
URL: http://svn.apache.org/viewvc/avro/trunk/lang/php/lib/avro/io.php?rev=990860&view=auto
==============================================================================
--- avro/trunk/lang/php/lib/avro/io.php (added)
+++ avro/trunk/lang/php/lib/avro/io.php Mon Aug 30 16:50:40 2010
@@ -0,0 +1,493 @@
+<?php
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Avro IO object classes
+ * @package Avro
+ */
+
+/**
+ * Exceptions associated with AvroIO instances.
+ * @package Avro
+ */
+class AvroIOException extends AvroException {}
+
+/**
+ * Barebones IO base class to provide common interface for file and string
+ * access within the Avro classes.
+ *
+ * @package Avro
+ */
+class AvroIO
+{
+
+  /**
+   * @var string general read mode
+   */
+  const READ_MODE = 'r';
+  /**
+   * @var string general write mode.
+   */
+  const WRITE_MODE = 'w';
+
+  /**
+   * @var int set position equal to $offset bytes
+   */
+  const SEEK_CUR = SEEK_CUR;
+  /**
+   * @var int set position to current index + $offset bytes
+   */
+  const SEEK_SET = SEEK_SET;
+  /**
+   * @var int set position to end of file + $offset bytes
+   */
+  const SEEK_END = SEEK_END;
+
+  /**
+   * Read $len bytes from AvroIO instance
+   * @var int $len
+   * @return string bytes read
+   */
+  public function read($len)
+  {
+    throw new AvroNotImplementedException('Not implemented');
+  }
+
+  /**
+   * Append bytes to this buffer. (Nothing more is needed to support Avro.)
+   * @param str $arg bytes to write
+   * @returns int count of bytes written.
+   * @throws AvroIOException if $args is not a string value.
+   */
+  public function write($arg)
+  {
+    throw new AvroNotImplementedException('Not implemented');
+  }
+
+  /**
+   * Return byte offset within AvroIO instance
+   * @return int
+   */
+  public function tell()
+  {
+    throw new AvroNotImplementedException('Not implemented');
+  }
+
+  /**
+   * Set the position indicator. The new position, measured in bytes
+   * from the beginning of the file, is obtained by adding $offset to
+   * the position specified by $whence.
+   *
+   * @param int $offset
+   * @param int $whence one of AvroIO::SEEK_SET, AvroIO::SEEK_CUR,
+   *                    or Avro::SEEK_END
+   * @returns boolean true
+   *
+   * @throws AvroIOException
+   */
+  public function seek($offset, $whence=self::SEEK_SET)
+  {
+    throw new AvroNotImplementedException('Not implemented');
+  }
+
+  /**
+   * Flushes any buffered data to the AvroIO object.
+   * @returns boolean true upon success.
+   */
+  public function flush()
+  {
+    throw new AvroNotImplementedException('Not implemented');
+  }
+
+  /**
+   * Returns whether or not the current position at the end of this AvroIO
+   * instance.
+   *
+   * Note is_eof() is <b>not</b> like eof in C or feof in PHP:
+   * it returns TRUE if the *next* read would be end of file,
+   * rather than if the *most recent* read read end of file.
+   * @returns boolean true if at the end of file, and false otherwise
+   */
+  public function is_eof()
+  {
+    throw new AvroNotImplementedException('Not implemented');
+  }
+
+  /**
+   * Closes this AvroIO instance.
+   */
+  public function close()
+  {
+    throw new AvroNotImplementedException('Not implemented');
+  }
+
+}
+
+/**
+ * AvroIO wrapper for string access
+ * @package Avro
+ */
+class AvroStringIO extends AvroIO
+{
+  /**
+   * @var array  array of individual bytes
+   */
+  private $buffer;
+  /**
+   * @var int  current position in string
+   */
+  private $current_index;
+  /**
+   * @var boolean whether or not the string is closed.
+   */
+  private $is_closed;
+
+  /**
+   * @param string $str initial value of AvroStringIO buffer. Regardless
+   *                    of the initial value, the pointer is set to the
+   *                    beginning of the buffer.
+   * @throws AvroIOException if a non-string value is passed as $str
+   */
+  public function __construct($str = '')
+  {
+    $this->is_closed = false;
+    $this->buffer = array();
+    $this->current_index = 0;
+
+    if (is_string($str))
+      $this->buffer = str_split($str);
+    else
+      throw new AvroIOException(
+        sprintf('constructor argument must be a string: %s', gettype($str)));
+  }
+
+  /**
+   * Append bytes to this buffer.
+   * (Nothing more is needed to support Avro.)
+   * @param str $arg bytes to write
+   * @returns int count of bytes written.
+   * @throws AvroIOException if $args is not a string value.
+   */
+  public function write($arg)
+  {
+    $this->check_closed();
+    if (is_string($arg))
+      return $this->append_str($arg);
+    throw new AvroIOException(
+      sprintf('write argument must be a string: (%s) %s',
+              gettype($arg), var_export($arg, true)));
+  }
+
+  /**
+   * @returns string bytes read from buffer
+   * @todo test for fencepost errors wrt updating current_index
+   */
+  public function read($len)
+  {
+    $this->check_closed();
+    $read = array_slice($this->buffer, $this->current_index, $len);
+    if (count($read) < $len)
+      $this->current_index = $this->length();
+    else
+      $this->current_index += $len;
+    return join($read);
+  }
+
+  /**
+   * @returns boolean true if successful
+   * @throws AvroIOException if the seek failed.
+   */
+  public function seek($offset, $whence=self::SEEK_SET)
+  {
+    if (!is_int($offset))
+      throw new AvroIOException('Seek offset must be an integer.');
+    // Prevent seeking before BOF
+    switch ($whence)
+    {
+      case self::SEEK_SET:
+        if (0 > $offset)
+          throw new AvroIOException('Cannot seek before beginning of file.');
+        $this->current_index = $offset;
+        break;
+      case self::SEEK_CUR:
+        if (0 > $this->current_index + $whence)
+          throw new AvroIOException('Cannot seek before beginning of file.');
+        $this->current_index += $offset;
+        break;
+      case self::SEEK_END:
+        if (0 > $this->length() + $offset)
+          throw new AvroIOException('Cannot seek before beginning of file.');
+        $this->current_index = $this->length() + $offset;
+        break;
+      default:
+        throw new AvroIOException(sprintf('Invalid seek whence %d', $whence));
+    }
+
+    return true;
+  }
+
+  /**
+   * @returns int
+   * @see AvroIO::tell()
+   */
+  public function tell() { return $this->current_index; }
+
+  /**
+   * @returns boolean
+   * @see AvroIO::is_eof()
+   */
+  public function is_eof()
+  {
+    return ($this->current_index >= $this->length());
+  }
+
+  /**
+   * No-op provided for compatibility with AvroIO interface.
+   * @returns boolean true
+   */
+  public function flush() { return true; }
+
+  /**
+   * Marks this buffer as closed.
+   * @returns boolean true
+   */
+  public function close()
+  {
+    $this->check_closed();
+    $this->is_closed = true;
+    return true;
+  }
+
+  /**
+   * @throws AvroIOException if the buffer is closed.
+   */
+  private function check_closed()
+  {
+    if ($this->is_closed())
+      throw new AvroIOException('Buffer is closed');
+  }
+
+  /**
+   * Appends bytes to this buffer.
+   * @param string $str
+   * @returns integer count of bytes written.
+   */
+  private function append_str($str)
+  {
+    $this->check_closed();
+    $ary = str_split($str);
+    $len = count($ary);
+    $this->buffer = array_merge($this->buffer, $ary);
+    $this->current_index += $len;
+    return $len;
+  }
+
+  /**
+   * Truncates the truncate buffer to 0 bytes and returns the pointer
+   * to the beginning of the buffer.
+   * @returns boolean true
+   */
+  public function truncate()
+  {
+    $this->check_closed();
+    $this->buffer = array();
+    $this->current_index = 0;
+    return true;
+  }
+
+  /**
+   * @returns int count of bytes in the buffer
+   * @internal Could probably memoize length for performance, but
+   *           no need do this yet.
+   */
+  public function length() { return count($this->buffer); }
+
+  /**
+   * @returns string
+   */
+  public function __toString() { return join($this->buffer); }
+
+
+  /**
+   * @returns string
+   * @uses self::__toString()
+   */
+  public function string() { return $this->__toString(); }
+
+  /**
+   * @returns boolean true if this buffer is closed and false
+   *                       otherwise.
+   */
+  public function is_closed() { return $this->is_closed; }
+}
+
+/**
+ * AvroIO wrapper for PHP file access functions
+ * @package Avro
+ */
+class AvroFile extends AvroIO
+{
+  /**
+   * @var string fopen read mode value. Used internally.
+   */
+  const FOPEN_READ_MODE = 'rb';
+
+  /**
+   * @var string fopen write mode value. Used internally.
+   */
+  const FOPEN_WRITE_MODE = 'wb';
+
+  /**
+   * @var string
+   */
+  private $file_path;
+
+  /**
+   * @var resource file handle for AvroFile instance
+   */
+  private $file_handle;
+
+  public function __construct($file_path, $mode = self::READ_MODE)
+  {
+    /**
+     * XXX: should we check for file existence (in case of reading)
+     * or anything else about the provided file_path argument?
+     */
+    $this->file_path = $file_path;
+    switch ($mode)
+    {
+      case self::WRITE_MODE:
+        $this->file_handle = fopen($this->file_path, self::FOPEN_WRITE_MODE);
+        if (false == $this->file_handle)
+          throw new AvroIOException('Could not open file for writing');
+        break;
+      case self::READ_MODE:
+        $this->file_handle = fopen($this->file_path, self::FOPEN_READ_MODE);
+        if (false == $this->file_handle)
+          throw new AvroIOException('Could not open file for reading');
+        break;
+      default:
+        throw new AvroIOException(
+          sprintf("Only modes '%s' and '%s' allowed. You provided '%s'.",
+                  self::READ_MODE, self::WRITE_MODE, $mode));
+    }
+  }
+
+  /**
+   * @returns int count of bytes written
+   * @throws AvroIOException if write failed.
+   */
+  public function write($str)
+  {
+    $len = fwrite($this->file_handle, $str);
+    if (false === $len)
+      throw new AvroIOException(sprintf('Could not write to file'));
+    return $len;
+  }
+
+  /**
+   * @param int $len count of bytes to read.
+   * @returns string bytes read
+   * @throws AvroIOException if length value is negative or if the read failed
+   */
+  public function read($len)
+  {
+    if (0 > $len)
+      throw new AvroIOException(
+        sprintf("Invalid length value passed to read: %d", $len));
+
+    if (0 == $len)
+      return '';
+
+    $bytes = fread($this->file_handle, $len);
+    if (false === $bytes)
+      throw new AvroIOException('Could not read from file');
+    return $bytes;
+  }
+
+  /**
+   * @returns int current position within the file
+   * @throws AvroFileExcpetion if tell failed.
+   */
+  public function tell()
+  {
+    $position = ftell($this->file_handle);
+    if (false === $position)
+      throw new AvroIOException('Could not execute tell on reader');
+    return $position;
+  }
+
+  /**
+   * @param int $offset
+   * @param int $whence
+   * @returns boolean true upon success
+   * @throws AvroIOException if seek failed.
+   * @see AvroIO::seek()
+   */
+  public function seek($offset, $whence = SEEK_SET)
+  {
+    $res = fseek($this->file_handle, $offset, $whence);
+    // Note: does not catch seeking beyond end of file
+    if (-1 === $res)
+      throw new AvroIOException(
+        sprintf("Could not execute seek (offset = %d, whence = %d)",
+                $offset, $whence));
+    return true;
+  }
+
+  /**
+   * Closes the file.
+   * @returns boolean true if successful.
+   * @throws AvroIOException if there was an error closing the file.
+   */
+  public function close()
+  {
+    $res = fclose($this->file_handle);
+    if (false === $res)
+      throw new AvroIOException('Error closing file.');
+    return $res;
+  }
+
+  /**
+   * @returns boolean true if the pointer is at the end of the file,
+   *                  and false otherwise.
+   * @see AvroIO::is_eof() as behavior differs from feof()
+   */
+  public function is_eof()
+  {
+    $this->read(1);
+    if (feof($this->file_handle))
+      return true;
+    $this->seek(-1, self::SEEK_CUR);
+    return false;
+  }
+
+  /**
+   * @returns boolean true if the flush was successful.
+   * @throws AvroIOException if there was an error flushing the file.
+   */
+  public function flush()
+  {
+    $res = fflush($this->file_handle);
+    if (false === $res)
+      throw new AvroIOException('Could not flush file.');
+    return true;
+  }
+
+}