You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by th...@apache.org on 2011/03/19 19:14:03 UTC
svn commit: r1083246 - in /avro/trunk: CHANGES.txt lang/c++/CMakeLists.txt
lang/c++/api/Generic.hh lang/c++/impl/Generic.cc lang/c++/parser/AvroLex.ll
lang/c++/test/CodecTests.cc
Author: thiru
Date: Sat Mar 19 18:14:03 2011
New Revision: 1083246
URL: http://svn.apache.org/viewvc?rev=1083246&view=rev
Log:
AVRO-781. Generic data support in C++
Added:
avro/trunk/lang/c++/api/Generic.hh
avro/trunk/lang/c++/impl/Generic.cc
Modified:
avro/trunk/CHANGES.txt
avro/trunk/lang/c++/CMakeLists.txt
avro/trunk/lang/c++/parser/AvroLex.ll
avro/trunk/lang/c++/test/CodecTests.cc
Modified: avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=1083246&r1=1083245&r2=1083246&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Sat Mar 19 18:14:03 2011
@@ -237,6 +237,8 @@ Avro 1.5.0 (10 March 2011)
AVRO-754. Java: Permit passing custom channel factory to NettyTransceiver.
(Bruno Dumon via cutting)
+ AVRO-781. Generic data support in C++. (thiru)
+
BUG FIXES
AVRO-764. Java: Bug in BinaryData.compare() with offset comparison.
Modified: avro/trunk/lang/c++/CMakeLists.txt
URL: http://svn.apache.org/viewvc/avro/trunk/lang/c%2B%2B/CMakeLists.txt?rev=1083246&r1=1083245&r2=1083246&view=diff
==============================================================================
--- avro/trunk/lang/c++/CMakeLists.txt (original)
+++ avro/trunk/lang/c++/CMakeLists.txt Sat Mar 19 18:14:03 2011
@@ -37,12 +37,14 @@ project (Avro-cpp)
find_package (Boost 1.38 COMPONENTS regex filesystem system)
include_directories (api ${BUILD_DIRECTORY})
-add_library (avrocpp SHARED impl/Compiler.cc
+
+add_library (avrocpp SHARED
impl/Compiler.cc impl/CompilerNode.cc impl/Node.cc
impl/NodeImpl.cc impl/Resolver.cc impl/ResolverSchema.cc impl/Schema.cc
impl/Types.cc impl/Validator.cc impl/ValidSchema.cc impl/Zigzag.cc
impl/BinaryEncoder.cc impl/BinaryDecoder.cc
impl/Stream.cc impl/FileStream.cc
+ impl/Generic.cc
impl/parsing/Symbol.cc
impl/parsing/ValidatingCodec.cc
impl/parsing/JsonCodec.cc
@@ -86,7 +88,7 @@ add_custom_command (OUTPUT ${BUILD_DIREC
macro (test name)
add_executable (${name} test/${name}.cc)
- target_link_libraries (${name} avrocpp boost_regex-mt)
+ target_link_libraries (${name} avrocpp ${Boost_LIBRARIES})
endmacro (test)
test(buffertest)
@@ -100,7 +102,7 @@ target_link_libraries (StreamTests avroc
add_executable (testgentest test/testgen.cc)
add_dependencies (testgentest testgen testgen2)
-target_link_libraries (testgentest avrocpp boost_regex-mt)
+target_link_libraries (testgentest avrocpp ${Boost_LIBRARIES})
include (InstallRequiredSystemLibraries)
Added: avro/trunk/lang/c++/api/Generic.hh
URL: http://svn.apache.org/viewvc/avro/trunk/lang/c%2B%2B/api/Generic.hh?rev=1083246&view=auto
==============================================================================
--- avro/trunk/lang/c++/api/Generic.hh (added)
+++ avro/trunk/lang/c++/api/Generic.hh Sat Mar 19 18:14:03 2011
@@ -0,0 +1,255 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef avro_Generic_hh__
+#define avro_Generic_hh__
+
+#include <vector>
+#include <map>
+#include <string>
+
+#include <boost/utility.hpp>
+#include <boost/any.hpp>
+
+#include "Node.hh"
+#include "Types.hh"
+#include "Encoder.hh"
+#include "Decoder.hh"
+#include "ValidSchema.hh"
+
+namespace avro {
+
+class GenericDatum {
+ Type type_;
+ boost::any value_;
+
+ GenericDatum(Type t) : type_(t) { }
+
+ template <typename T>
+ GenericDatum(Type t, const T& v) : type_(t), value_(v) { }
+
+public:
+ Type type() const {
+ return type_;
+ }
+
+ template<typename T>
+ const T& value() const {
+ return *boost::any_cast<T>(&value_);
+ }
+
+ template<typename T>
+ T& value() {
+ return *boost::any_cast<T>(&value_);
+ }
+
+ /// Makes a new AVRO_NULL datum.
+ GenericDatum() : type_(AVRO_NULL) { }
+
+ /// Makes a new AVRO_BOOL datum whose value is of type bool.
+ GenericDatum(bool v) : type_(AVRO_BOOL), value_(v) { }
+
+ /// Makes a new AVRO_INT datum whose value is of type int32_t.
+ GenericDatum(int32_t v) : type_(AVRO_INT), value_(v) { }
+
+ /// Makes a new AVRO_LONG datum whose value is of type int64_t.
+ GenericDatum(int64_t v) : type_(AVRO_LONG), value_(v) { }
+
+ /// Makes a new AVRO_FLOAT datum whose value is of type float.
+ GenericDatum(float v) : type_(AVRO_FLOAT), value_(v) { }
+
+ /// Makes a new AVRO_DOUBLE datum whose value is of type double.
+ GenericDatum(double v) : type_(AVRO_DOUBLE), value_(v) { }
+
+ /// Makes a new AVRO_STRING datum whose value is of type std::string.
+ GenericDatum(const std::string& v) : type_(AVRO_STRING), value_(v) { }
+
+ /// Makes a new AVRO_BYTES datum whose value is of type
+ /// std::vector<uint8_t>.
+ GenericDatum(const std::vector<uint8_t>& v) :
+ type_(AVRO_BYTES), value_(v) { }
+
+ GenericDatum(const NodePtr& schema);
+};
+
+class GenericContainer {
+ const NodePtr schema_;
+protected:
+ GenericContainer(const NodePtr& s) : schema_(s) { }
+
+ static void assertSameType(const GenericDatum& v, const NodePtr& n);
+ static void assertType(const NodePtr& schema, Type type,
+ const char* message);
+public:
+ /// Returns the schema for this object
+ const NodePtr& schema() const {
+ return schema_;
+ }
+};
+
+class GenericRecord : public GenericContainer {
+ std::vector<GenericDatum> fields_;
+public:
+ GenericRecord(const NodePtr& schema);
+
+ size_t fieldCount() const {
+ return fields_.size();
+ }
+
+ const GenericDatum& fieldAt(size_t pos) const {
+ return fields_[pos];
+ }
+
+ GenericDatum& fieldAt(size_t pos) {
+ return fields_[pos];
+ }
+
+ void setFieldAt(size_t pos, const GenericDatum& v) {
+ assertSameType(v, schema()->leafAt(pos));
+ fields_[pos] = v;
+ }
+};
+
+class GenericArray : public GenericContainer {
+public:
+ typedef std::vector<GenericDatum> Value;
+
+ GenericArray(const NodePtr& schema) : GenericContainer(schema) {
+ if (schema->type() != AVRO_ARRAY) {
+ throw Exception("Schema is not an array");
+ }
+ }
+
+ const Value& value() const {
+ return value_;
+ }
+
+ Value& value() {
+ return value_;
+ }
+private:
+ Value value_;
+};
+
+class GenericMap : public GenericContainer {
+public:
+ typedef std::vector<std::pair<std::string, GenericDatum> > Value;
+
+ GenericMap(const NodePtr& schema) : GenericContainer(schema) {
+ assertType(schema, AVRO_MAP, "Schema is not a map");
+ }
+
+ const Value& value() const {
+ return value_;
+ }
+
+ Value& value() {
+ return value_;
+ }
+private:
+ Value value_;
+};
+
+class GenericEnum : public GenericContainer {
+ size_t value_;
+public:
+ GenericEnum(const NodePtr& schema) : GenericContainer(schema), value_(0) {
+ }
+
+ const std::string& symbol(size_t n) {
+ if (n < schema()->names()) {
+ return schema()->nameAt(n);
+ }
+ throw Exception("Not as many symbols");
+ }
+
+ size_t index(const std::string& symbol) const {
+ size_t result;
+ if (schema()->nameIndex(symbol, result)) {
+ return result;
+ }
+ throw Exception("No such symbol");
+ }
+
+ size_t set(const std::string& symbol) {
+ return value_ = index(symbol);
+ }
+
+ void set(size_t n) {
+ if (n < schema()->names()) {
+ value_ = n;
+ return;
+ }
+ throw Exception("Not as many symbols");
+ }
+
+ size_t value() const {
+ return value_;
+ }
+
+ const std::string& symbol() const {
+ return schema()->nameAt(value_);
+ }
+};
+
+class GenericFixed : public GenericContainer {
+ std::vector<uint8_t> value_;
+public:
+ GenericFixed(const NodePtr& schema) : GenericContainer(schema) {
+ value_.resize(schema->fixedSize());
+ }
+
+ const std::vector<uint8_t>& value() const {
+ return value_;
+ }
+
+ std::vector<uint8_t>& value() {
+ return value_;
+ }
+};
+
+
+class GenericReader : boost::noncopyable {
+ const ValidSchema schema_;
+ const bool isResolving_;
+ const DecoderPtr decoder_;
+
+ static void read(GenericDatum& datum, const NodePtr& n, Decoder& d,
+ bool isResolving);
+public:
+ GenericReader(const ValidSchema& s, const DecoderPtr& decoder);
+ GenericReader(const ValidSchema& writerSchema,
+ const ValidSchema& readerSchema, const DecoderPtr& decoder);
+
+ void read(GenericDatum& datum) const;
+};
+
+
+class GenericWriter : boost::noncopyable {
+ const ValidSchema schema_;
+ const EncoderPtr encoder_;
+
+ static void write(const GenericDatum& datum, const NodePtr& n, Encoder& e);
+public:
+ GenericWriter(const ValidSchema& s, const EncoderPtr& encoder);
+
+ void write(const GenericDatum& datum) const;
+};
+} // namespace avro
+#endif
+
Added: avro/trunk/lang/c++/impl/Generic.cc
URL: http://svn.apache.org/viewvc/avro/trunk/lang/c%2B%2B/impl/Generic.cc?rev=1083246&view=auto
==============================================================================
--- avro/trunk/lang/c++/impl/Generic.cc (added)
+++ avro/trunk/lang/c++/impl/Generic.cc Sat Mar 19 18:14:03 2011
@@ -0,0 +1,425 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Generic.hh"
+#include "NodeImpl.hh"
+#include <sstream>
+
+namespace avro {
+
+using std::string;
+using std::vector;
+using std::ostringstream;
+
+typedef vector<uint8_t> bytes;
+
+void GenericContainer::assertType(const NodePtr& schema, Type type,
+ const char* message)
+{
+ if (schema->type() != type) {
+ throw Exception(message);
+ }
+}
+
+GenericDatum::GenericDatum(const NodePtr& schema) : type_(schema->type())
+{
+ if (type_ == AVRO_SYMBOLIC) {
+ type_ = static_cast<NodeSymbolic&>(*schema).type();
+ }
+ switch (type_) {
+ case AVRO_NULL:
+ break;
+ case AVRO_BOOL:
+ value_ = bool();
+ break;
+ case AVRO_INT:
+ value_ = int32_t();
+ break;
+ case AVRO_LONG:
+ value_ = int64_t();
+ break;
+ case AVRO_FLOAT:
+ value_ = float();
+ break;
+ case AVRO_DOUBLE:
+ value_ = double();
+ break;
+ case AVRO_STRING:
+ value_ = string();
+ break;
+ case AVRO_BYTES:
+ value_ = vector<uint8_t>();
+ break;
+ case AVRO_FIXED:
+ value_ = GenericFixed(schema);
+ break;
+ case AVRO_RECORD:
+ value_ = GenericRecord(schema);
+ break;
+ case AVRO_ENUM:
+ value_ = GenericEnum(schema);
+ break;
+ case AVRO_ARRAY:
+ value_ = GenericArray(schema);
+ break;
+ case AVRO_MAP:
+ value_ = GenericMap(schema);
+ break;
+ case AVRO_UNION:
+ throw Exception("Generic datum cannot be a union");
+ default:
+ throw Exception(boost::format("Unknown schema type %1%") %
+ toString(type_));
+ }
+}
+
+GenericRecord::GenericRecord(const NodePtr& schema) : GenericContainer(schema) {
+ fields_.resize(schema->leaves());
+}
+
+GenericReader::GenericReader(const ValidSchema& s, const DecoderPtr& decoder) :
+ schema_(s), isResolving_(false), decoder_(decoder)
+{
+}
+
+GenericReader::GenericReader(const ValidSchema& writerSchema,
+ const ValidSchema& readerSchema, const DecoderPtr& decoder) :
+ schema_(readerSchema),
+ isResolving_(true),
+ decoder_(resolvingDecoder(writerSchema, readerSchema, decoder))
+{
+}
+
+void GenericReader::read(GenericDatum& datum) const
+{
+ read(datum, schema_.root(), *decoder_, isResolving_);
+}
+
+static void ensureType(GenericDatum& datum, const NodePtr& n)
+{
+ if (datum.type() != n->type()) {
+ switch (n->type()) {
+ case AVRO_NULL:
+ datum = GenericDatum();
+ break;
+ case AVRO_BOOL:
+ datum = bool();
+ break;
+ case AVRO_INT:
+ datum = int32_t();
+ break;
+ case AVRO_LONG:
+ datum = int64_t();
+ break;
+ case AVRO_FLOAT:
+ datum = float();
+ break;
+ case AVRO_DOUBLE:
+ datum = double();
+ break;
+ case AVRO_STRING:
+ datum = string();
+ break;
+ case AVRO_BYTES:
+ datum = bytes();
+ break;
+ case AVRO_FIXED:
+ case AVRO_RECORD:
+ case AVRO_ENUM:
+ case AVRO_ARRAY:
+ case AVRO_MAP:
+ datum = n;
+ break;
+ case AVRO_UNION:
+ break;
+ default:
+ throw Exception("Unknown schema type");
+ }
+ }
+}
+
+void GenericReader::read(GenericDatum& datum, const NodePtr& n, Decoder& d,
+ bool isResolving)
+{
+ NodePtr nn = n;
+ if (nn->type() == AVRO_UNION) {
+ size_t r = d.decodeUnionIndex();
+ nn = nn->leafAt(r);
+ }
+ if (nn->type() == AVRO_SYMBOLIC) {
+ nn = static_cast<NodeSymbolic&>(*nn).getNode();
+ }
+ ensureType(datum, nn);
+ switch (nn->type()) {
+ case AVRO_NULL:
+ d.decodeNull();
+ break;
+ case AVRO_BOOL:
+ datum.value<bool>() = d.decodeBool();
+ break;
+ case AVRO_INT:
+ datum.value<int32_t>() = d.decodeInt();
+ break;
+ case AVRO_LONG:
+ datum.value<int64_t>() = d.decodeLong();
+ break;
+ case AVRO_FLOAT:
+ datum.value<float>() = d.decodeFloat();
+ break;
+ case AVRO_DOUBLE:
+ datum.value<double>() = d.decodeDouble();
+ break;
+ case AVRO_STRING:
+ d.decodeString(datum.value<string>());
+ break;
+ case AVRO_BYTES:
+ d.decodeBytes(datum.value<bytes>());
+ break;
+ case AVRO_FIXED:
+ d.decodeFixed(nn->fixedSize(), datum.value<GenericFixed>().value());
+ break;
+ case AVRO_RECORD:
+ {
+ GenericRecord& r = datum.value<GenericRecord>();
+ size_t c = nn->leaves();
+ if (isResolving) {
+ std::vector<size_t> fo =
+ static_cast<ResolvingDecoder&>(d).fieldOrder();
+ for (size_t i = 0; i < c; ++i) {
+ read(r.fieldAt(fo[i]), nn->leafAt(fo[i]), d, isResolving);
+ }
+ } else {
+ for (size_t i = 0; i < c; ++i) {
+ read(r.fieldAt(i), nn->leafAt(i), d, isResolving);
+ }
+ }
+ }
+ break;
+ case AVRO_ENUM:
+ datum.value<GenericEnum>().set(d.decodeEnum());
+ break;
+ case AVRO_ARRAY:
+ {
+ vector<GenericDatum>& r = datum.value<GenericArray>().value();
+ r.resize(0);
+ size_t start = 0;
+ for (size_t m = d.arrayStart(); m != 0; m = d.arrayNext()) {
+ r.resize(r.size() + m);
+ for (; start < r.size(); ++start) {
+ read(r[start], nn->leafAt(0), d, isResolving);
+ }
+ }
+ }
+ break;
+ case AVRO_MAP:
+ {
+ GenericMap::Value& r = datum.value<GenericMap>().value();
+ r.resize(0);
+ size_t start = 0;
+ for (size_t m = d.mapStart(); m != 0; m = d.mapNext()) {
+ r.resize(r.size() + m);
+ for (; start < r.size(); ++start) {
+ d.decodeString(r[start].first);
+ read(r[start].second, nn->leafAt(1), d, isResolving);
+ }
+ }
+ }
+ break;
+ default:
+ throw Exception("Unknown schema type");
+ }
+}
+
+static void typeMismatch(Type t, Type u)
+{
+ throw Exception(boost::format("Type mismatch %1% v %2%") %
+ toString(t) % toString(u));
+}
+
+template <typename T>
+bool hasSameName(const GenericDatum& datum, const NodePtr& n)
+{
+ const T& c = datum.value<T>();
+ return c.schema()->name() == n->name();
+}
+
+template <typename T>
+void assertSameType(const GenericDatum& datum, const NodePtr& n)
+{
+ const T& c = datum.value<T>();
+ if (c.schema() != n) {
+ typeMismatch(c.schema()->type(), n->type());
+ }
+}
+
+static void assertType(const GenericDatum& datum, const NodePtr& n)
+{
+ if (datum.type() == n->type()) {
+ switch (n->type()) {
+ case AVRO_FIXED:
+ assertSameType<GenericFixed>(datum, n);
+ return;
+ case AVRO_RECORD:
+ assertSameType<GenericRecord>(datum, n);
+ return;
+ case AVRO_ENUM:
+ assertSameType<GenericEnum>(datum, n);
+ return;
+ case AVRO_NULL:
+ case AVRO_BOOL:
+ case AVRO_INT:
+ case AVRO_LONG:
+ case AVRO_FLOAT:
+ case AVRO_DOUBLE:
+ case AVRO_STRING:
+ case AVRO_BYTES:
+ case AVRO_ARRAY:
+ case AVRO_MAP:
+ return;
+ }
+ }
+ typeMismatch(datum.type(), n->type());
+}
+
+GenericWriter::GenericWriter(const ValidSchema& s, const EncoderPtr& encoder) :
+ schema_(s), encoder_(encoder)
+{
+}
+
+void GenericWriter::write(const GenericDatum& datum) const
+{
+ write(datum, schema_.root(), *encoder_);
+}
+
+static size_t selectBranch(const GenericDatum& datum, const NodePtr& n)
+{
+ size_t c = n->leaves();
+ for (size_t i = 0; i < c; ++i) {
+ const NodePtr& nn = n->leafAt(i);
+ if (datum.type() == nn->type()) {
+ switch (datum.type()) {
+ case AVRO_FIXED:
+ if (hasSameName<GenericFixed>(datum, nn)) return i;
+ break;
+ case AVRO_RECORD:
+ if (hasSameName<GenericRecord>(datum, nn)) return i;
+ break;
+ case AVRO_ENUM:
+ if (hasSameName<GenericEnum>(datum, nn)) return i;
+ break;
+ default:
+ return i;
+ }
+ }
+ }
+ ostringstream oss;
+ n->printJson(oss, 0);
+ throw Exception(boost::format("No match for %1% in %2%") %
+ toString(datum.type()) % oss.str());
+}
+
+void GenericWriter::write(const GenericDatum& datum,
+ const NodePtr& n, Encoder& e)
+{
+ NodePtr nn = n;
+ if (nn->type() == AVRO_UNION) {
+ size_t br = selectBranch(datum, nn);
+ e.encodeUnionIndex(br);
+ nn = nn->leafAt(br);
+ }
+ if (nn->type() == AVRO_SYMBOLIC) {
+ nn = static_cast<NodeSymbolic&>(*nn).getNode();
+ }
+ assertType(datum, nn);
+ switch (nn->type()) {
+ case AVRO_NULL:
+ e.encodeNull();
+ break;
+ case AVRO_BOOL:
+ e.encodeBool(datum.value<bool>());
+ break;
+ case AVRO_INT:
+ e.encodeInt(datum.value<int32_t>());
+ break;
+ case AVRO_LONG:
+ e.encodeLong(datum.value<int64_t>());
+ break;
+ case AVRO_FLOAT:
+ e.encodeFloat(datum.value<float>());
+ break;
+ case AVRO_DOUBLE:
+ e.encodeDouble(datum.value<double>());
+ break;
+ case AVRO_STRING:
+ e.encodeString(datum.value<string>());
+ break;
+ case AVRO_BYTES:
+ e.encodeBytes(datum.value<bytes>());
+ break;
+ case AVRO_FIXED:
+ e.encodeFixed(datum.value<GenericFixed>().value());
+ break;
+ case AVRO_RECORD:
+ {
+ const GenericRecord& r = datum.value<GenericRecord>();
+ size_t c = nn->leaves();
+ for (size_t i = 0; i < c; ++i) {
+ write(r.fieldAt(i), nn->leafAt(i), e);
+ }
+ }
+ break;
+ case AVRO_ENUM:
+ e.encodeEnum(datum.value<GenericEnum>().value());
+ break;
+ case AVRO_ARRAY:
+ {
+ const GenericArray::Value& r = datum.value<GenericArray>().value();
+ e.arrayStart();
+ if (! r.empty()) {
+ e.setItemCount(r.size());
+ for (GenericArray::Value::const_iterator it = r.begin();
+ it != r.end(); ++it) {
+ e.startItem();
+ write(*it, nn->leafAt(0), e);
+ }
+ }
+ e.arrayEnd();
+ }
+ break;
+ case AVRO_MAP:
+ {
+ const GenericMap::Value& r = datum.value<GenericMap>().value();
+ e.mapStart();
+ if (! r.empty()) {
+ e.setItemCount(r.size());
+ for (GenericMap::Value::const_iterator it = r.begin();
+ it != r.end(); ++it) {
+ e.startItem();
+ e.encodeString(it->first);
+ write(it->second, nn->leafAt(1), e);
+ }
+ }
+ e.mapEnd();
+ }
+ break;
+ default:
+ throw Exception("Unknown schema type");
+ }
+}
+
+} // namespace avro
Modified: avro/trunk/lang/c++/parser/AvroLex.ll
URL: http://svn.apache.org/viewvc/avro/trunk/lang/c%2B%2B/parser/AvroLex.ll?rev=1083246&r1=1083245&r2=1083246&view=diff
==============================================================================
--- avro/trunk/lang/c++/parser/AvroLex.ll (original)
+++ avro/trunk/lang/c++/parser/AvroLex.ll Sat Mar 19 18:14:03 2011
@@ -109,7 +109,7 @@ anytext .*
<READFIELD>\"name\"{delim}\" yy_push_state(READFIELDNAME);
<READFIELD>\} yy_pop_state(); return AVRO_LEX_FIELD_END;
<READFIELD>, return yytext[0];
-<READFIELD>{avrotext}+{delim} yy_push_state(READMETADATA); return AVRO_LEX_METADATA;
+<READFIELD>\"{avrotext}\"+{delim} yy_push_state(READMETADATA); return AVRO_LEX_METADATA;
<READFIELD>{ws} ;
<READFIELDS>\{ yy_push_state(READFIELD); return AVRO_LEX_FIELD;
@@ -158,7 +158,7 @@ anytext .*
<INOBJECT>\"symbols\"{delim}\[ yy_push_state(READSYMBOLS); return AVRO_LEX_SYMBOLS;
<INOBJECT>, return yytext[0];
<INOBJECT>\} yy_pop_state(); return yytext[0];
-<INOBJECT>{avrotext}+{delim} yy_push_state(READMETADATA); return AVRO_LEX_METADATA;
+<INOBJECT>\"{avrotext}+\"{delim} yy_push_state(READMETADATA); return AVRO_LEX_METADATA;
<INOBJECT>{ws} ;
<STARTTYPE>\" yy_pop_state(); yy_push_state(READTYPE);
Modified: avro/trunk/lang/c++/test/CodecTests.cc
URL: http://svn.apache.org/viewvc/avro/trunk/lang/c%2B%2B/test/CodecTests.cc?rev=1083246&r1=1083245&r2=1083246&view=diff
==============================================================================
--- avro/trunk/lang/c++/test/CodecTests.cc (original)
+++ avro/trunk/lang/c++/test/CodecTests.cc Sat Mar 19 18:14:03 2011
@@ -22,6 +22,7 @@
#include "Decoder.hh"
#include "Compiler.hh"
#include "ValidSchema.hh"
+#include "Generic.hh"
#include <stdint.h>
#include <vector>
@@ -53,6 +54,14 @@ static const unsigned int count = 10;
* To test Json encoder and decoder, we use the same technqiue with only
* one difference - we use JsonEncoder and JsonDecoder.
*
+ * We also use the same infrastructure to test GenericReader and GenericWriter.
+ * In this case, avro binary is generated in the standard way. It is read
+ * into a GenericDatum, which in turn is written out. This newly serialized
+ * data is decoded in the standard way to check that it is what is written. The
+ * last step won't work if there is schema for reading is different from
+ * that for writing. This is because any reordering of fields would have
+ * got fixed by the GenericDatum's decoding and encoding step.
+ *
* For most tests, the data is generated at random.
*/
@@ -611,7 +620,7 @@ void testCodec(const TestData& td) {
<< " schema: " << td.schema
<< " calls: " << td.calls
<< " skip-level: " << skipLevel << std::endl;
- */
+ */
BOOST_TEST_CHECKPOINT("Test: " << testNo << ' '
<< " schema: " << td.schema
<< " calls: " << td.calls
@@ -736,6 +745,128 @@ void testWriterFail(const TestData2& td)
td.incorrectCalls, v, p), Exception);
}
+template<typename CodecFactory>
+void testGeneric(const TestData& td) {
+ static int testNo = 0;
+ testNo++;
+
+ ValidSchema vs = makeValidSchema(td.schema);
+
+ for (unsigned int i = 0; i < count; ++i) {
+ vector<string> v;
+ auto_ptr<OutputStream> p;
+ testEncoder(CodecFactory::newEncoder(vs), td.calls, v, p);
+ // dump(*p);
+ DecoderPtr d1 = CodecFactory::newDecoder(vs);
+ auto_ptr<InputStream> in1 = memoryInputStream(*p);
+ d1->init(*in1);
+ GenericReader gr(vs, d1);
+ GenericDatum datum;
+ gr.read(datum);
+
+ EncoderPtr e2 = CodecFactory::newEncoder(vs);
+ auto_ptr<OutputStream> ob = memoryOutputStream();
+ e2->init(*ob);
+
+ GenericWriter gw(vs, e2);
+ gw.write(datum);
+ e2->flush();
+
+ BOOST_TEST_CHECKPOINT("Test: " << testNo << ' '
+ << " schema: " << td.schema
+ << " calls: " << td.calls);
+ auto_ptr<InputStream> in2 = memoryInputStream(*ob);
+ testDecoder(CodecFactory::newDecoder(vs), v, *in2,
+ td.calls, td.depth);
+ }
+}
+
+template<typename CodecFactory>
+void testGenericResolving(const TestData3& td) {
+ static int testNo = 0;
+ testNo++;
+
+ BOOST_TEST_CHECKPOINT("Test: " << testNo << ' '
+ << " writer schema: " << td.writerSchema
+ << " writer calls: " << td.writerCalls
+ << " reader schema: " << td.readerSchema
+ << " reader calls: " << td.readerCalls);
+
+ ValidSchema wvs = makeValidSchema(td.writerSchema);
+ ValidSchema rvs = makeValidSchema(td.readerSchema);
+
+ for (unsigned int i = 0; i < count; ++i) {
+ vector<string> v;
+ auto_ptr<OutputStream> p;
+ testEncoder(CodecFactory::newEncoder(wvs), td.writerCalls, v, p);
+ // dump(*p);
+ DecoderPtr d1 = CodecFactory::newDecoder(wvs);
+ auto_ptr<InputStream> in1 = memoryInputStream(*p);
+ d1->init(*in1);
+
+ GenericReader gr(wvs, rvs, d1);
+ GenericDatum datum;
+ gr.read(datum);
+
+ EncoderPtr e2 = CodecFactory::newEncoder(rvs);
+ auto_ptr<OutputStream> ob = memoryOutputStream();
+ e2->init(*ob);
+
+ GenericWriter gw(rvs, e2);
+ gw.write(datum);
+ e2->flush();
+
+ BOOST_TEST_CHECKPOINT("Test: " << testNo << ' '
+ << " writer-schemai " << td.writerSchema
+ << " writer-calls: " << td.writerCalls
+ << " reader-schema: " << td.readerSchema
+ << " calls: " << td.readerCalls);
+ auto_ptr<InputStream> in2 = memoryInputStream(*ob);
+ testDecoder(CodecFactory::newDecoder(rvs), v, *in2,
+ td.readerCalls, td.depth);
+ }
+}
+
+template<typename CodecFactory>
+void testGenericResolving2(const TestData4& td) {
+ static int testNo = 0;
+ testNo++;
+
+ BOOST_TEST_CHECKPOINT("Test: " << testNo << ' '
+ << " writer schema: " << td.writerSchema
+ << " writer calls: " << td.writerCalls
+ << " reader schema: " << td.readerSchema
+ << " reader calls: " << td.readerCalls);
+
+ ValidSchema wvs = makeValidSchema(td.writerSchema);
+ ValidSchema rvs = makeValidSchema(td.readerSchema);
+
+ const vector<string> wd = mkValues(td.writerValues);
+
+ auto_ptr<OutputStream> p = generate(*CodecFactory::newEncoder(wvs),
+ td.writerCalls, wd);
+ // dump(*p);
+ DecoderPtr d1 = CodecFactory::newDecoder(wvs);
+ auto_ptr<InputStream> in1 = memoryInputStream(*p);
+ d1->init(*in1);
+
+ GenericReader gr(wvs, rvs, d1);
+ GenericDatum datum;
+ gr.read(datum);
+
+ EncoderPtr e2 = CodecFactory::newEncoder(rvs);
+ auto_ptr<OutputStream> ob = memoryOutputStream();
+ e2->init(*ob);
+
+ GenericWriter gw(rvs, e2);
+ gw.write(datum);
+ e2->flush();
+ // We cannot verify with the reader calls because they are for
+ // the resolving decoder and hence could be in a different order than
+ // the "normal" data.
+}
+
+
static const TestData data[] = {
{ "\"null\"", "N", 1 },
{ "\"boolean\"", "B", 1 },
@@ -1262,6 +1393,10 @@ void add_tests(boost::unit_test::test_su
testCodecResolving2, data4);
ADD_TESTS(ts, ValidatingEncoderResolvingDecoderFactory,
testCodecResolving2, data4);
+
+ ADD_TESTS(ts, ValidatingCodecFactory, testGeneric, data);
+ ADD_TESTS(ts, ValidatingCodecFactory, testGenericResolving, data3);
+ ADD_TESTS(ts, ValidatingCodecFactory, testGenericResolving2, data4);
}
} // namespace parsing