You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by th...@apache.org on 2011/03/19 19:14:03 UTC

svn commit: r1083246 - in /avro/trunk: CHANGES.txt lang/c++/CMakeLists.txt lang/c++/api/Generic.hh lang/c++/impl/Generic.cc lang/c++/parser/AvroLex.ll lang/c++/test/CodecTests.cc

Author: thiru
Date: Sat Mar 19 18:14:03 2011
New Revision: 1083246

URL: http://svn.apache.org/viewvc?rev=1083246&view=rev
Log:
AVRO-781. Generic data support in C++

Added:
    avro/trunk/lang/c++/api/Generic.hh
    avro/trunk/lang/c++/impl/Generic.cc
Modified:
    avro/trunk/CHANGES.txt
    avro/trunk/lang/c++/CMakeLists.txt
    avro/trunk/lang/c++/parser/AvroLex.ll
    avro/trunk/lang/c++/test/CodecTests.cc

Modified: avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=1083246&r1=1083245&r2=1083246&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Sat Mar 19 18:14:03 2011
@@ -237,6 +237,8 @@ Avro 1.5.0 (10 March 2011)
     AVRO-754. Java: Permit passing custom channel factory to NettyTransceiver.
     (Bruno Dumon via cutting)
 
+    AVRO-781. Generic data support in C++. (thiru)
+
   BUG FIXES
 
     AVRO-764. Java: Bug in BinaryData.compare() with offset comparison.

Modified: avro/trunk/lang/c++/CMakeLists.txt
URL: http://svn.apache.org/viewvc/avro/trunk/lang/c%2B%2B/CMakeLists.txt?rev=1083246&r1=1083245&r2=1083246&view=diff
==============================================================================
--- avro/trunk/lang/c++/CMakeLists.txt (original)
+++ avro/trunk/lang/c++/CMakeLists.txt Sat Mar 19 18:14:03 2011
@@ -37,12 +37,14 @@ project (Avro-cpp)
 find_package (Boost 1.38 COMPONENTS regex filesystem system)
 
 include_directories (api ${BUILD_DIRECTORY})
-add_library (avrocpp SHARED impl/Compiler.cc
+
+add_library (avrocpp SHARED
         impl/Compiler.cc impl/CompilerNode.cc impl/Node.cc
         impl/NodeImpl.cc impl/Resolver.cc impl/ResolverSchema.cc impl/Schema.cc
         impl/Types.cc impl/Validator.cc impl/ValidSchema.cc impl/Zigzag.cc
         impl/BinaryEncoder.cc impl/BinaryDecoder.cc
         impl/Stream.cc impl/FileStream.cc
+        impl/Generic.cc
         impl/parsing/Symbol.cc
         impl/parsing/ValidatingCodec.cc
         impl/parsing/JsonCodec.cc
@@ -86,7 +88,7 @@ add_custom_command (OUTPUT ${BUILD_DIREC
     
 macro (test name)
     add_executable (${name} test/${name}.cc)
-    target_link_libraries (${name} avrocpp boost_regex-mt)
+    target_link_libraries (${name} avrocpp ${Boost_LIBRARIES})
 endmacro (test)
 
 test(buffertest)
@@ -100,7 +102,7 @@ target_link_libraries (StreamTests avroc
 
 add_executable (testgentest test/testgen.cc)
 add_dependencies (testgentest testgen testgen2)
-target_link_libraries (testgentest avrocpp boost_regex-mt)
+target_link_libraries (testgentest avrocpp ${Boost_LIBRARIES})
 
 include (InstallRequiredSystemLibraries)
 

Added: avro/trunk/lang/c++/api/Generic.hh
URL: http://svn.apache.org/viewvc/avro/trunk/lang/c%2B%2B/api/Generic.hh?rev=1083246&view=auto
==============================================================================
--- avro/trunk/lang/c++/api/Generic.hh (added)
+++ avro/trunk/lang/c++/api/Generic.hh Sat Mar 19 18:14:03 2011
@@ -0,0 +1,255 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef avro_Generic_hh__
+#define avro_Generic_hh__
+
+#include <vector>
+#include <map>
+#include <string>
+
+#include <boost/utility.hpp>
+#include <boost/any.hpp>
+
+#include "Node.hh"
+#include "Types.hh"
+#include "Encoder.hh"
+#include "Decoder.hh"
+#include "ValidSchema.hh"
+
+namespace avro {
+
+class GenericDatum {
+    Type type_;
+    boost::any value_;
+
+    GenericDatum(Type t) : type_(t) { }
+
+    template <typename T>
+    GenericDatum(Type t, const T& v) : type_(t), value_(v) { }
+
+public:
+    Type type() const {
+        return type_;
+    }
+
+    template<typename T>
+    const T& value() const {
+        return *boost::any_cast<T>(&value_);
+    }
+
+    template<typename T>
+    T& value() {
+        return *boost::any_cast<T>(&value_);
+    }
+
+    /// Makes a new AVRO_NULL datum.
+    GenericDatum() : type_(AVRO_NULL) { }
+
+    /// Makes a new AVRO_BOOL datum whose value is of type bool.
+    GenericDatum(bool v) : type_(AVRO_BOOL), value_(v) { }
+
+    /// Makes a new AVRO_INT datum whose value is of type int32_t.
+    GenericDatum(int32_t v) : type_(AVRO_INT), value_(v) { }
+
+    /// Makes a new AVRO_LONG datum whose value is of type int64_t.
+    GenericDatum(int64_t v) : type_(AVRO_LONG), value_(v) { }
+
+    /// Makes a new AVRO_FLOAT datum whose value is of type float.
+    GenericDatum(float v) : type_(AVRO_FLOAT), value_(v) { }
+
+    /// Makes a new AVRO_DOUBLE datum whose value is of type double.
+    GenericDatum(double v) : type_(AVRO_DOUBLE), value_(v) { }
+
+    /// Makes a new AVRO_STRING datum whose value is of type std::string.
+    GenericDatum(const std::string& v) : type_(AVRO_STRING), value_(v) { }
+
+    /// Makes a new AVRO_BYTES datum whose value is of type
+    /// std::vector<uint8_t>.
+    GenericDatum(const std::vector<uint8_t>& v) :
+        type_(AVRO_BYTES), value_(v) { }
+
+    GenericDatum(const NodePtr& schema);
+};
+
+class GenericContainer {
+    const NodePtr schema_;
+protected:
+    GenericContainer(const NodePtr& s) : schema_(s) { }
+
+    static void assertSameType(const GenericDatum& v, const NodePtr& n);
+    static void assertType(const NodePtr& schema, Type type,
+        const char* message);
+public:
+    /// Returns the schema for this object
+    const NodePtr& schema() const {
+        return schema_;
+    }
+};
+
+class GenericRecord : public GenericContainer {
+    std::vector<GenericDatum> fields_;
+public:
+    GenericRecord(const NodePtr& schema);
+
+    size_t fieldCount() const {
+        return fields_.size();
+    }
+
+    const GenericDatum& fieldAt(size_t pos) const {
+        return fields_[pos];
+    }
+
+    GenericDatum& fieldAt(size_t pos) {
+        return fields_[pos];
+    }
+
+    void setFieldAt(size_t pos, const GenericDatum& v) {
+        assertSameType(v, schema()->leafAt(pos));    
+        fields_[pos] = v;
+    }
+};
+
+class GenericArray : public GenericContainer {
+public:
+    typedef std::vector<GenericDatum> Value;
+
+    GenericArray(const NodePtr& schema) : GenericContainer(schema) {
+        if (schema->type() != AVRO_ARRAY) {
+            throw Exception("Schema is not an array");
+        }
+    }
+
+    const Value& value() const {
+        return value_;
+    }
+
+    Value& value() {
+        return value_;
+    }
+private:
+    Value value_;
+};
+
+class GenericMap : public GenericContainer {
+public:
+    typedef std::vector<std::pair<std::string, GenericDatum> > Value;
+
+    GenericMap(const NodePtr& schema) : GenericContainer(schema) {
+        assertType(schema, AVRO_MAP, "Schema is not a map");
+    }
+
+    const Value& value() const {
+        return value_;
+    }
+
+    Value& value() {
+        return value_;
+    }
+private:
+    Value value_;
+};
+
+class GenericEnum : public GenericContainer {
+    size_t value_;
+public:
+    GenericEnum(const NodePtr& schema) : GenericContainer(schema), value_(0) {
+    }
+
+    const std::string& symbol(size_t n) {
+        if (n < schema()->names()) {
+            return schema()->nameAt(n);
+        }
+        throw Exception("Not as many symbols");
+    }
+
+    size_t index(const std::string& symbol) const {
+        size_t result;
+        if (schema()->nameIndex(symbol, result)) {
+            return result;
+        }
+        throw Exception("No such symbol");
+    }
+
+    size_t set(const std::string& symbol) {
+        return value_ = index(symbol);
+    }
+
+    void set(size_t n) {
+        if (n < schema()->names()) {
+            value_ = n;
+            return;
+        }
+        throw Exception("Not as many symbols");
+    }
+
+    size_t value() const {
+        return value_;
+    }
+
+    const std::string& symbol() const {
+        return schema()->nameAt(value_);
+    }
+};
+
+class GenericFixed : public GenericContainer {
+    std::vector<uint8_t> value_;
+public:
+    GenericFixed(const NodePtr& schema) : GenericContainer(schema) {
+        value_.resize(schema->fixedSize());
+    }
+
+    const std::vector<uint8_t>& value() const {
+        return value_;
+    }
+
+    std::vector<uint8_t>& value() {
+        return value_;
+    }
+};
+
+
+class GenericReader : boost::noncopyable {
+    const ValidSchema schema_;
+    const bool isResolving_;
+    const DecoderPtr decoder_;
+
+    static void read(GenericDatum& datum, const NodePtr& n, Decoder& d,
+        bool isResolving);
+public:
+    GenericReader(const ValidSchema& s, const DecoderPtr& decoder);
+    GenericReader(const ValidSchema& writerSchema,
+        const ValidSchema& readerSchema, const DecoderPtr& decoder);
+
+    void read(GenericDatum& datum) const;
+};
+
+
+class GenericWriter : boost::noncopyable {
+    const ValidSchema schema_;
+    const EncoderPtr encoder_;
+
+    static void write(const GenericDatum& datum, const NodePtr& n, Encoder& e);
+public:
+    GenericWriter(const ValidSchema& s, const EncoderPtr& encoder);
+
+    void write(const GenericDatum& datum) const;
+};
+}   // namespace avro
+#endif
+

Added: avro/trunk/lang/c++/impl/Generic.cc
URL: http://svn.apache.org/viewvc/avro/trunk/lang/c%2B%2B/impl/Generic.cc?rev=1083246&view=auto
==============================================================================
--- avro/trunk/lang/c++/impl/Generic.cc (added)
+++ avro/trunk/lang/c++/impl/Generic.cc Sat Mar 19 18:14:03 2011
@@ -0,0 +1,425 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Generic.hh"
+#include "NodeImpl.hh"
+#include <sstream>
+
+namespace avro {
+
+using std::string;
+using std::vector;
+using std::ostringstream;
+
+typedef vector<uint8_t> bytes;
+
+void GenericContainer::assertType(const NodePtr& schema, Type type,
+    const char* message)
+{
+    if (schema->type() != type) {
+        throw Exception(message);
+    }
+}
+
+GenericDatum::GenericDatum(const NodePtr& schema) : type_(schema->type())
+{
+    if (type_ == AVRO_SYMBOLIC) {
+        type_ = static_cast<NodeSymbolic&>(*schema).type();
+    }
+    switch (type_) {
+        case AVRO_NULL:
+            break;
+        case AVRO_BOOL:
+            value_ = bool();
+            break;
+        case AVRO_INT:
+            value_ = int32_t();
+            break;
+        case AVRO_LONG:
+            value_ = int64_t();
+            break;
+        case AVRO_FLOAT:
+            value_ = float();
+            break;
+        case AVRO_DOUBLE:
+            value_ = double();
+            break;
+        case AVRO_STRING:
+            value_ = string();
+            break;
+        case AVRO_BYTES:
+            value_ = vector<uint8_t>();
+            break;
+        case AVRO_FIXED:
+            value_ = GenericFixed(schema);
+            break;
+        case AVRO_RECORD:
+            value_ = GenericRecord(schema);
+            break;
+        case AVRO_ENUM:
+            value_ = GenericEnum(schema);
+            break;
+        case AVRO_ARRAY:
+            value_ = GenericArray(schema);
+            break;
+        case AVRO_MAP:
+            value_ = GenericMap(schema);
+            break;
+        case AVRO_UNION:
+            throw Exception("Generic datum cannot be a union");
+        default:
+            throw Exception(boost::format("Unknown schema type %1%") %
+                toString(type_));
+    }
+}
+
+GenericRecord::GenericRecord(const NodePtr& schema) : GenericContainer(schema) {
+    fields_.resize(schema->leaves());
+}
+
+GenericReader::GenericReader(const ValidSchema& s, const DecoderPtr& decoder) :
+    schema_(s), isResolving_(false), decoder_(decoder)
+{
+}
+
+GenericReader::GenericReader(const ValidSchema& writerSchema,
+    const ValidSchema& readerSchema, const DecoderPtr& decoder) :
+    schema_(readerSchema),
+    isResolving_(true),
+    decoder_(resolvingDecoder(writerSchema, readerSchema, decoder))
+{
+}
+
+void GenericReader::read(GenericDatum& datum) const
+{
+    read(datum, schema_.root(), *decoder_, isResolving_);
+}
+
+static void ensureType(GenericDatum& datum, const NodePtr& n)
+{
+    if (datum.type() != n->type()) {
+        switch (n->type()) {
+        case AVRO_NULL:
+            datum = GenericDatum();
+            break;
+        case AVRO_BOOL:
+            datum = bool();
+            break;
+        case AVRO_INT:
+            datum = int32_t();
+            break;
+        case AVRO_LONG:
+            datum = int64_t();
+            break;
+        case AVRO_FLOAT:
+            datum = float();
+            break;
+        case AVRO_DOUBLE:
+            datum = double();
+            break;
+        case AVRO_STRING:
+            datum = string();
+            break;
+        case AVRO_BYTES:
+            datum = bytes();
+            break;
+        case AVRO_FIXED:
+        case AVRO_RECORD:
+        case AVRO_ENUM:
+        case AVRO_ARRAY:
+        case AVRO_MAP:
+            datum = n;
+            break;
+        case AVRO_UNION:
+            break;
+        default:
+            throw Exception("Unknown schema type");
+        }
+    }
+}
+
+void GenericReader::read(GenericDatum& datum, const NodePtr& n, Decoder& d,
+    bool isResolving)
+{
+    NodePtr nn = n;
+    if (nn->type() == AVRO_UNION) {
+        size_t r = d.decodeUnionIndex();
+        nn = nn->leafAt(r);
+    }
+    if (nn->type() == AVRO_SYMBOLIC) {
+        nn = static_cast<NodeSymbolic&>(*nn).getNode();
+    }
+    ensureType(datum, nn);
+    switch (nn->type()) {
+    case AVRO_NULL:
+        d.decodeNull();
+        break;
+    case AVRO_BOOL:
+        datum.value<bool>() = d.decodeBool();
+        break;
+    case AVRO_INT:
+        datum.value<int32_t>() = d.decodeInt();
+        break;
+    case AVRO_LONG:
+        datum.value<int64_t>() = d.decodeLong();
+        break;
+    case AVRO_FLOAT:
+        datum.value<float>() = d.decodeFloat();
+        break;
+    case AVRO_DOUBLE:
+        datum.value<double>() = d.decodeDouble();
+        break;
+    case AVRO_STRING:
+        d.decodeString(datum.value<string>());
+        break;
+    case AVRO_BYTES:
+        d.decodeBytes(datum.value<bytes>());
+        break;
+    case AVRO_FIXED:
+        d.decodeFixed(nn->fixedSize(), datum.value<GenericFixed>().value());
+        break;
+    case AVRO_RECORD:
+        {
+            GenericRecord& r = datum.value<GenericRecord>();
+            size_t c = nn->leaves();
+            if (isResolving) {
+                std::vector<size_t> fo =
+                    static_cast<ResolvingDecoder&>(d).fieldOrder();
+                for (size_t i = 0; i < c; ++i) {
+                    read(r.fieldAt(fo[i]), nn->leafAt(fo[i]), d, isResolving);
+                }
+            } else {
+                for (size_t i = 0; i < c; ++i) {
+                    read(r.fieldAt(i), nn->leafAt(i), d, isResolving);
+                }
+            }
+        }
+        break;
+    case AVRO_ENUM:
+        datum.value<GenericEnum>().set(d.decodeEnum());
+        break;
+    case AVRO_ARRAY:
+        {
+            vector<GenericDatum>& r = datum.value<GenericArray>().value();
+            r.resize(0);
+            size_t start = 0;
+            for (size_t m = d.arrayStart(); m != 0; m = d.arrayNext()) {
+                r.resize(r.size() + m);
+                for (; start < r.size(); ++start) {
+                    read(r[start], nn->leafAt(0), d, isResolving);
+                }
+            }
+        }
+        break;
+    case AVRO_MAP:
+        {
+            GenericMap::Value& r = datum.value<GenericMap>().value();
+            r.resize(0);
+            size_t start = 0;
+            for (size_t m = d.mapStart(); m != 0; m = d.mapNext()) {
+                r.resize(r.size() + m);
+                for (; start < r.size(); ++start) {
+                    d.decodeString(r[start].first);
+                    read(r[start].second, nn->leafAt(1), d, isResolving);
+                }
+            }
+        }
+        break;
+    default:
+        throw Exception("Unknown schema type");
+    }
+}
+
+static void typeMismatch(Type t, Type u)
+{
+    throw Exception(boost::format("Type mismatch %1% v %2%") %
+        toString(t) % toString(u));
+}
+
+template <typename T>
+bool hasSameName(const GenericDatum& datum, const NodePtr& n)
+{
+    const T& c = datum.value<T>();
+    return c.schema()->name() == n->name();
+}
+
+template <typename T>
+void assertSameType(const GenericDatum& datum, const NodePtr& n)
+{
+    const T& c = datum.value<T>();
+    if (c.schema() != n) {
+        typeMismatch(c.schema()->type(), n->type());
+    }
+}
+
+static void assertType(const GenericDatum& datum, const NodePtr& n)
+{
+    if (datum.type() == n->type()) {
+        switch (n->type()) {
+        case AVRO_FIXED:
+            assertSameType<GenericFixed>(datum, n);
+            return;
+        case AVRO_RECORD:
+            assertSameType<GenericRecord>(datum, n);
+            return;
+        case AVRO_ENUM:
+            assertSameType<GenericEnum>(datum, n);
+            return;
+        case AVRO_NULL:
+        case AVRO_BOOL:
+        case AVRO_INT:
+        case AVRO_LONG:
+        case AVRO_FLOAT:
+        case AVRO_DOUBLE:
+        case AVRO_STRING:
+        case AVRO_BYTES:
+        case AVRO_ARRAY:
+        case AVRO_MAP:
+            return;
+        }
+    }
+    typeMismatch(datum.type(), n->type());
+}
+
+GenericWriter::GenericWriter(const ValidSchema& s, const EncoderPtr& encoder) :
+    schema_(s), encoder_(encoder)
+{
+}
+
+void GenericWriter::write(const GenericDatum& datum) const
+{
+    write(datum, schema_.root(), *encoder_);
+}
+
+static size_t selectBranch(const GenericDatum& datum, const NodePtr& n)
+{
+    size_t c = n->leaves();
+    for (size_t i = 0; i < c; ++i) {
+        const NodePtr& nn = n->leafAt(i);
+        if (datum.type() == nn->type()) {
+            switch (datum.type()) {
+            case AVRO_FIXED:
+                if (hasSameName<GenericFixed>(datum, nn)) return i;
+                break;
+            case AVRO_RECORD:
+                if (hasSameName<GenericRecord>(datum, nn)) return i;
+                break;
+            case AVRO_ENUM:
+                if (hasSameName<GenericEnum>(datum, nn)) return i;
+                break;
+            default:
+                return i;
+            }
+        }
+    }
+    ostringstream oss;
+    n->printJson(oss, 0);
+    throw Exception(boost::format("No match for %1% in %2%") %
+        toString(datum.type()) % oss.str());
+}
+
+void GenericWriter::write(const GenericDatum& datum,
+    const NodePtr& n, Encoder& e)
+{
+    NodePtr nn = n;
+    if (nn->type() == AVRO_UNION) {
+        size_t br = selectBranch(datum, nn);
+        e.encodeUnionIndex(br);
+        nn = nn->leafAt(br);
+    }
+    if (nn->type() == AVRO_SYMBOLIC) {
+        nn = static_cast<NodeSymbolic&>(*nn).getNode();
+    }
+    assertType(datum, nn);
+    switch (nn->type()) {
+    case AVRO_NULL:
+        e.encodeNull();
+        break;
+    case AVRO_BOOL:
+        e.encodeBool(datum.value<bool>());
+        break;
+    case AVRO_INT:
+        e.encodeInt(datum.value<int32_t>());
+        break;
+    case AVRO_LONG:
+        e.encodeLong(datum.value<int64_t>());
+        break;
+    case AVRO_FLOAT:
+        e.encodeFloat(datum.value<float>());
+        break;
+    case AVRO_DOUBLE:
+        e.encodeDouble(datum.value<double>());
+        break;
+    case AVRO_STRING:
+        e.encodeString(datum.value<string>());
+        break;
+    case AVRO_BYTES:
+        e.encodeBytes(datum.value<bytes>());
+        break;
+    case AVRO_FIXED:
+        e.encodeFixed(datum.value<GenericFixed>().value());
+        break;
+    case AVRO_RECORD:
+        {
+            const GenericRecord& r = datum.value<GenericRecord>();
+            size_t c = nn->leaves();
+            for (size_t i = 0; i < c; ++i) {
+                write(r.fieldAt(i), nn->leafAt(i), e);
+            }
+        }
+        break;
+    case AVRO_ENUM:
+        e.encodeEnum(datum.value<GenericEnum>().value());
+        break;
+    case AVRO_ARRAY:
+        {
+            const GenericArray::Value& r = datum.value<GenericArray>().value();
+            e.arrayStart();
+            if (! r.empty()) {
+                e.setItemCount(r.size());
+                for (GenericArray::Value::const_iterator it = r.begin();
+                    it != r.end(); ++it) {
+                    e.startItem();
+                    write(*it, nn->leafAt(0), e);
+                }
+            }
+            e.arrayEnd();
+        }
+        break;
+    case AVRO_MAP:
+        {
+            const GenericMap::Value& r = datum.value<GenericMap>().value();
+            e.mapStart();
+            if (! r.empty()) {
+                e.setItemCount(r.size());
+                for (GenericMap::Value::const_iterator it = r.begin();
+                    it != r.end(); ++it) {
+                    e.startItem();
+                    e.encodeString(it->first);
+                    write(it->second, nn->leafAt(1), e);
+                }
+            }
+            e.mapEnd();
+        }
+        break;
+    default:
+        throw Exception("Unknown schema type");
+    }
+}
+
+}   // namespace avro

Modified: avro/trunk/lang/c++/parser/AvroLex.ll
URL: http://svn.apache.org/viewvc/avro/trunk/lang/c%2B%2B/parser/AvroLex.ll?rev=1083246&r1=1083245&r2=1083246&view=diff
==============================================================================
--- avro/trunk/lang/c++/parser/AvroLex.ll (original)
+++ avro/trunk/lang/c++/parser/AvroLex.ll Sat Mar 19 18:14:03 2011
@@ -109,7 +109,7 @@ anytext .*
 <READFIELD>\"name\"{delim}\"    yy_push_state(READFIELDNAME); 
 <READFIELD>\}                   yy_pop_state(); return AVRO_LEX_FIELD_END;
 <READFIELD>,                    return yytext[0];
-<READFIELD>{avrotext}+{delim}      yy_push_state(READMETADATA); return AVRO_LEX_METADATA;
+<READFIELD>\"{avrotext}\"+{delim}      yy_push_state(READMETADATA); return AVRO_LEX_METADATA;
 <READFIELD>{ws}                 ;
 
 <READFIELDS>\{                  yy_push_state(READFIELD); return AVRO_LEX_FIELD;
@@ -158,7 +158,7 @@ anytext .*
 <INOBJECT>\"symbols\"{delim}\[  yy_push_state(READSYMBOLS); return AVRO_LEX_SYMBOLS;
 <INOBJECT>,                     return yytext[0];
 <INOBJECT>\}                    yy_pop_state(); return yytext[0];
-<INOBJECT>{avrotext}+{delim}       yy_push_state(READMETADATA); return AVRO_LEX_METADATA;
+<INOBJECT>\"{avrotext}+\"{delim}       yy_push_state(READMETADATA); return AVRO_LEX_METADATA;
 <INOBJECT>{ws}                  ;
 
 <STARTTYPE>\"                   yy_pop_state(); yy_push_state(READTYPE); 

Modified: avro/trunk/lang/c++/test/CodecTests.cc
URL: http://svn.apache.org/viewvc/avro/trunk/lang/c%2B%2B/test/CodecTests.cc?rev=1083246&r1=1083245&r2=1083246&view=diff
==============================================================================
--- avro/trunk/lang/c++/test/CodecTests.cc (original)
+++ avro/trunk/lang/c++/test/CodecTests.cc Sat Mar 19 18:14:03 2011
@@ -22,6 +22,7 @@
 #include "Decoder.hh"
 #include "Compiler.hh"
 #include "ValidSchema.hh"
+#include "Generic.hh"
 
 #include <stdint.h>
 #include <vector>
@@ -53,6 +54,14 @@ static const unsigned int count = 10;
  * To test Json encoder and decoder, we use the same technqiue with only
  * one difference - we use JsonEncoder and JsonDecoder.
  *
+ * We also use the same infrastructure to test GenericReader and GenericWriter.
+ * In this case, avro binary is generated in the standard way. It is read
+ * into a GenericDatum, which in turn is written out. This newly serialized
+ * data is decoded in the standard way to check that it is what is written. The
+ * last step won't work if there is schema for reading is different from
+ * that for writing. This is because any reordering of fields would have
+ * got fixed by the GenericDatum's decoding and encoding step.
+ *
  * For most tests, the data is generated at random.
  */
 
@@ -611,7 +620,7 @@ void testCodec(const TestData& td) {
                 << " schema: " << td.schema
                 << " calls: " << td.calls
                 << " skip-level: " << skipLevel << std::endl;
-            */
+                */
             BOOST_TEST_CHECKPOINT("Test: " << testNo << ' '
                 << " schema: " << td.schema
                 << " calls: " << td.calls
@@ -736,6 +745,128 @@ void testWriterFail(const TestData2& td)
         td.incorrectCalls, v, p), Exception);
 }
 
+template<typename CodecFactory>
+void testGeneric(const TestData& td) {
+    static int testNo = 0;
+    testNo++;
+
+    ValidSchema vs = makeValidSchema(td.schema);
+
+    for (unsigned int i = 0; i < count; ++i) {
+        vector<string> v;
+        auto_ptr<OutputStream> p;
+        testEncoder(CodecFactory::newEncoder(vs), td.calls, v, p);
+        // dump(*p);
+        DecoderPtr d1 = CodecFactory::newDecoder(vs);
+        auto_ptr<InputStream> in1 = memoryInputStream(*p);
+        d1->init(*in1);
+        GenericReader gr(vs, d1);
+        GenericDatum datum;
+        gr.read(datum);
+
+        EncoderPtr e2 = CodecFactory::newEncoder(vs);
+        auto_ptr<OutputStream> ob = memoryOutputStream();
+        e2->init(*ob);
+
+        GenericWriter gw(vs, e2);
+        gw.write(datum);
+        e2->flush();
+
+        BOOST_TEST_CHECKPOINT("Test: " << testNo << ' '
+            << " schema: " << td.schema
+            << " calls: " << td.calls);
+        auto_ptr<InputStream> in2 = memoryInputStream(*ob);
+        testDecoder(CodecFactory::newDecoder(vs), v, *in2,
+            td.calls, td.depth);
+    }
+}
+
+template<typename CodecFactory>
+void testGenericResolving(const TestData3& td) {
+    static int testNo = 0;
+    testNo++;
+
+    BOOST_TEST_CHECKPOINT("Test: " << testNo << ' '
+        << " writer schema: " << td.writerSchema
+        << " writer calls: " << td.writerCalls
+        << " reader schema: " << td.readerSchema
+        << " reader calls: " << td.readerCalls);
+
+    ValidSchema wvs = makeValidSchema(td.writerSchema);
+    ValidSchema rvs = makeValidSchema(td.readerSchema);
+
+    for (unsigned int i = 0; i < count; ++i) {
+        vector<string> v;
+        auto_ptr<OutputStream> p;
+        testEncoder(CodecFactory::newEncoder(wvs), td.writerCalls, v, p);
+        // dump(*p);
+        DecoderPtr d1 = CodecFactory::newDecoder(wvs);
+        auto_ptr<InputStream> in1 = memoryInputStream(*p);
+        d1->init(*in1);
+
+        GenericReader gr(wvs, rvs, d1);
+        GenericDatum datum;
+        gr.read(datum);
+
+        EncoderPtr e2 = CodecFactory::newEncoder(rvs);
+        auto_ptr<OutputStream> ob = memoryOutputStream();
+        e2->init(*ob);
+
+        GenericWriter gw(rvs, e2);
+        gw.write(datum);
+        e2->flush();
+
+        BOOST_TEST_CHECKPOINT("Test: " << testNo << ' '
+            << " writer-schemai " << td.writerSchema
+            << " writer-calls: " << td.writerCalls 
+            << " reader-schema: " << td.readerSchema
+            << " calls: " << td.readerCalls);
+        auto_ptr<InputStream> in2 = memoryInputStream(*ob);
+        testDecoder(CodecFactory::newDecoder(rvs), v, *in2,
+            td.readerCalls, td.depth);
+    }
+}
+
+template<typename CodecFactory>
+void testGenericResolving2(const TestData4& td) {
+    static int testNo = 0;
+    testNo++;
+
+    BOOST_TEST_CHECKPOINT("Test: " << testNo << ' '
+        << " writer schema: " << td.writerSchema
+        << " writer calls: " << td.writerCalls
+        << " reader schema: " << td.readerSchema
+        << " reader calls: " << td.readerCalls);
+
+    ValidSchema wvs = makeValidSchema(td.writerSchema);
+    ValidSchema rvs = makeValidSchema(td.readerSchema);
+
+    const vector<string> wd = mkValues(td.writerValues);
+
+    auto_ptr<OutputStream> p = generate(*CodecFactory::newEncoder(wvs),
+        td.writerCalls, wd);
+    // dump(*p);
+    DecoderPtr d1 = CodecFactory::newDecoder(wvs);
+    auto_ptr<InputStream> in1 = memoryInputStream(*p);
+    d1->init(*in1);
+
+    GenericReader gr(wvs, rvs, d1);
+    GenericDatum datum;
+    gr.read(datum);
+
+    EncoderPtr e2 = CodecFactory::newEncoder(rvs);
+    auto_ptr<OutputStream> ob = memoryOutputStream();
+    e2->init(*ob);
+
+    GenericWriter gw(rvs, e2);
+    gw.write(datum);
+    e2->flush();
+    // We cannot verify with the reader calls because they are for
+    // the resolving decoder and hence could be in a different order than
+    // the "normal" data.
+}
+
+
 static const TestData data[] = {
     { "\"null\"", "N", 1 },
     { "\"boolean\"", "B", 1 },
@@ -1262,6 +1393,10 @@ void add_tests(boost::unit_test::test_su
         testCodecResolving2, data4);
     ADD_TESTS(ts, ValidatingEncoderResolvingDecoderFactory,
         testCodecResolving2, data4);
+
+    ADD_TESTS(ts, ValidatingCodecFactory, testGeneric, data);
+    ADD_TESTS(ts, ValidatingCodecFactory, testGenericResolving, data3);
+    ADD_TESTS(ts, ValidatingCodecFactory, testGenericResolving2, data4);
 }
 
 }   // namespace parsing