You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by sb...@apache.org on 2009/11/17 23:34:16 UTC
svn commit: r881579 - in /hadoop/avro/trunk: ./ src/c++/api/ src/c++/impl/
src/c++/test/
Author: sbanacho
Date: Tue Nov 17 22:34:15 2009
New Revision: 881579
URL: http://svn.apache.org/viewvc?rev=881579&view=rev
Log:
AVRO-205. APIs for checking schema resolution.
Added:
hadoop/avro/trunk/src/c++/api/SchemaResolution.hh
Modified:
hadoop/avro/trunk/CHANGES.txt
hadoop/avro/trunk/src/c++/api/Exception.hh
hadoop/avro/trunk/src/c++/api/Node.hh
hadoop/avro/trunk/src/c++/api/NodeImpl.hh
hadoop/avro/trunk/src/c++/api/Reader.hh
hadoop/avro/trunk/src/c++/api/SymbolMap.hh
hadoop/avro/trunk/src/c++/impl/NodeImpl.cc
hadoop/avro/trunk/src/c++/impl/Types.cc
hadoop/avro/trunk/src/c++/test/unittest.cc
Modified: hadoop/avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/CHANGES.txt?rev=881579&r1=881578&r2=881579&view=diff
==============================================================================
--- hadoop/avro/trunk/CHANGES.txt (original)
+++ hadoop/avro/trunk/CHANGES.txt Tue Nov 17 22:34:15 2009
@@ -68,6 +68,8 @@
AVRO-204. Change the way symbolic references are tracked. (sbanacho)
+ AVRO-205. APIs for checking schema resolution. (sbanacho)
+
OPTIMIZATIONS
AVRO-172. More efficient schema processing (massie)
Modified: hadoop/avro/trunk/src/c++/api/Exception.hh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/Exception.hh?rev=881579&r1=881578&r2=881579&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/api/Exception.hh (original)
+++ hadoop/avro/trunk/src/c++/api/Exception.hh Tue Nov 17 22:34:15 2009
@@ -31,7 +31,7 @@
{
public:
- Exception(const std::string msg) :
+ Exception(const std::string &msg) :
std::runtime_error(msg)
{ }
Modified: hadoop/avro/trunk/src/c++/api/Node.hh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/Node.hh?rev=881579&r1=881578&r2=881579&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/api/Node.hh (original)
+++ hadoop/avro/trunk/src/c++/api/Node.hh Tue Nov 17 22:34:15 2009
@@ -25,6 +25,7 @@
#include "Exception.hh"
#include "Types.hh"
+#include "SchemaResolution.hh"
namespace avro {
@@ -54,7 +55,6 @@
Node(Type type) :
type_(type),
- refCount_(0),
locked_(false)
{}
@@ -105,6 +105,8 @@
virtual bool isValid() const = 0;
+ virtual SchemaResolution resolve(const Node &reader) const = 0;
+
virtual void printJson(std::ostream &os, int depth) const = 0;
virtual void printBasicInfo(std::ostream &os) const = 0;
@@ -131,7 +133,6 @@
private:
const Type type_;
- int refCount_;
bool locked_;
};
Modified: hadoop/avro/trunk/src/c++/api/NodeImpl.hh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/NodeImpl.hh?rev=881579&r1=881578&r2=881579&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/api/NodeImpl.hh (original)
+++ hadoop/avro/trunk/src/c++/api/NodeImpl.hh Tue Nov 17 22:34:15 2009
@@ -118,6 +118,8 @@
void setLeafToSymbolic(int index, const NodePtr &node);
+ SchemaResolution furtherResolution(const Node &node) const;
+
NameConcept nameAttribute_;
LeavesConcept leafAttributes_;
LeafNamesConcept leafNameAttributes_;
@@ -156,6 +158,8 @@
NodeImplPrimitive(type)
{ }
+ SchemaResolution resolve(const Node &reader) const;
+
void printJson(std::ostream &os, int depth) const;
bool isValid() const {
@@ -175,6 +179,8 @@
NodeImplSymbolic(AVRO_SYMBOLIC, name, node, NoLeafNames(), NoSize())
{ }
+ SchemaResolution resolve(const Node &reader) const;
+
void printJson(std::ostream &os, int depth) const;
bool isValid() const {
@@ -195,6 +201,8 @@
NodeImplRecord(AVRO_RECORD, name, fields, fieldsNames, NoSize())
{ }
+ SchemaResolution resolve(const Node &reader) const;
+
void printJson(std::ostream &os, int depth) const;
bool isValid() const {
@@ -218,6 +226,8 @@
NodeImplEnum(AVRO_ENUM, name, NoLeaves(), symbols, NoSize())
{ }
+ SchemaResolution resolve(const Node &reader) const;
+
void printJson(std::ostream &os, int depth) const;
bool isValid() const {
@@ -240,6 +250,8 @@
NodeImplArray(AVRO_ARRAY, NoName(), items, NoLeafNames(), NoSize())
{ }
+ SchemaResolution resolve(const Node &reader) const;
+
void printJson(std::ostream &os, int depth) const;
bool isValid() const {
@@ -269,6 +281,8 @@
std::swap(leafAttributes_.get(0), leafAttributes_.get(1));
}
+ SchemaResolution resolve(const Node &reader) const;
+
void printJson(std::ostream &os, int depth) const;
bool isValid() const {
@@ -288,6 +302,8 @@
NodeImplUnion(AVRO_UNION, NoName(), types, NoLeafNames(), NoSize())
{ }
+ SchemaResolution resolve(const Node &reader) const;
+
void printJson(std::ostream &os, int depth) const;
bool isValid() const {
@@ -307,6 +323,8 @@
NodeImplFixed(AVRO_FIXED, name, NoLeaves(), NoLeafNames(), size)
{ }
+ SchemaResolution resolve(const Node &reader) const;
+
void printJson(std::ostream &os, int depth) const;
bool isValid() const {
Modified: hadoop/avro/trunk/src/c++/api/Reader.hh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/Reader.hh?rev=881579&r1=881578&r2=881579&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/api/Reader.hh (original)
+++ hadoop/avro/trunk/src/c++/api/Reader.hh Tue Nov 17 22:34:15 2009
@@ -81,6 +81,7 @@
void readValue(std::string &val) {
int64_t size = readSize();
+ val.clear();
val.reserve(size);
uint8_t bval;
for(size_t bytes = 0; bytes < static_cast<size_t>(size); bytes++) {
Added: hadoop/avro/trunk/src/c++/api/SchemaResolution.hh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/SchemaResolution.hh?rev=881579&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/api/SchemaResolution.hh (added)
+++ hadoop/avro/trunk/src/c++/api/SchemaResolution.hh Tue Nov 17 22:34:15 2009
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef avro_SchemaResolution_hh__
+#define avro_SchemaResolution_hh__
+
+namespace avro {
+
+
+enum SchemaResolution {
+
+ /// The schemas definitely do not match
+
+ RESOLVE_NO_MATCH,
+
+ /// The schemas match at a cursory level
+ ///
+ /// For records and enums, this means the name is the same, but it does not
+ /// necessarily mean that every symbol or field is an exact match.
+
+ RESOLVE_MATCH,
+
+ /// For primitives, the matching may occur if the type is promotable. This means that the
+ /// writer matches reader if the writer's type is promoted the specified type.
+
+ //@{
+
+ RESOLVE_PROMOTABLE_TO_LONG,
+ RESOLVE_PROMOTABLE_TO_FLOAT,
+ RESOLVE_PROMOTABLE_TO_DOUBLE,
+
+ //@}
+
+};
+
+} // namespace avro
+
+#endif
Modified: hadoop/avro/trunk/src/c++/api/SymbolMap.hh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/SymbolMap.hh?rev=881579&r1=881578&r2=881579&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/api/SymbolMap.hh (original)
+++ hadoop/avro/trunk/src/c++/api/SymbolMap.hh Tue Nov 17 22:34:15 2009
@@ -20,6 +20,7 @@
#define avro_SymbolMap_hh__
#include <map>
+#include <boost/noncopyable.hpp>
#include "Node.hh"
#include "Schema.hh"
@@ -32,7 +33,7 @@
/// implements the symbolic name to node mapping.
///
-class SymbolMap
+class SymbolMap : private boost::noncopyable
{
public:
Modified: hadoop/avro/trunk/src/c++/impl/NodeImpl.cc
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/impl/NodeImpl.cc?rev=881579&r1=881578&r2=881579&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/impl/NodeImpl.cc (original)
+++ hadoop/avro/trunk/src/c++/impl/NodeImpl.cc Tue Nov 17 22:34:15 2009
@@ -21,7 +21,172 @@
namespace avro {
-/// Wrap an indentation in a struct for ostream operator<<
+
+template < class A, class B, class C, class D >
+SchemaResolution
+NodeImpl<A,B,C,D>::furtherResolution(const Node &reader) const
+{
+ SchemaResolution match = RESOLVE_NO_MATCH;
+
+ if(reader.type() == AVRO_SYMBOLIC) {
+
+ // resolve the symbolic type, and check again
+ const NodePtr &node = reader.leafAt(0);
+ match = resolve(*node);
+ }
+ else if(reader.type() == AVRO_UNION) {
+
+ // in this case, need to see if there is an exact match for the
+ // writer's type, or if not, the first one that can be promoted to a
+ // match
+
+ for(size_t i= 0; i < reader.leaves(); ++i) {
+
+ const NodePtr &node = reader.leafAt(i);
+ SchemaResolution thisMatch = resolve(*node);
+
+ // if matched then the search is done
+ if(thisMatch == RESOLVE_MATCH) {
+ match = thisMatch;
+ break;
+ }
+
+ // thisMatch is either no match, or promotable, this will set match to
+ // promotable if it hasn't been set already
+ if (match == RESOLVE_NO_MATCH) {
+ match = thisMatch;
+ }
+ }
+ }
+
+ return match;
+}
+
+SchemaResolution
+NodePrimitive::resolve(const Node &reader) const
+{
+ if(type() == reader.type()) {
+ return RESOLVE_MATCH;
+ }
+
+ switch ( type() ) {
+
+ case AVRO_INT:
+
+ if( reader.type() == AVRO_LONG ) {
+ return RESOLVE_PROMOTABLE_TO_LONG;
+ }
+
+ // fall-through intentional
+
+ case AVRO_LONG:
+
+ if (reader.type() == AVRO_FLOAT) {
+ return RESOLVE_PROMOTABLE_TO_FLOAT;
+ }
+
+ // fall-through intentional
+
+ case AVRO_FLOAT:
+
+ if (reader.type() == AVRO_DOUBLE) {
+ return RESOLVE_PROMOTABLE_TO_DOUBLE;
+ }
+
+ default:
+ break;
+ }
+
+ return furtherResolution(reader);
+}
+
+SchemaResolution
+NodeRecord::resolve(const Node &reader) const
+{
+ if(reader.type() == AVRO_RECORD) {
+ if(name() == reader.name()) {
+ return RESOLVE_MATCH;
+ }
+ }
+ return furtherResolution(reader);
+}
+
+SchemaResolution
+NodeEnum::resolve(const Node &reader) const
+{
+ if(reader.type() == AVRO_ENUM) {
+ return (name() == reader.name()) ? RESOLVE_MATCH : RESOLVE_NO_MATCH;
+ }
+ return furtherResolution(reader);
+}
+
+SchemaResolution
+NodeArray::resolve(const Node &reader) const
+{
+ if(reader.type() == AVRO_ARRAY) {
+ const NodePtr &arrayType = leafAt(0);
+ return arrayType->resolve(*reader.leafAt(0));
+ }
+ return furtherResolution(reader);
+}
+
+SchemaResolution
+NodeMap::resolve(const Node &reader) const
+{
+ if(reader.type() == AVRO_MAP) {
+ const NodePtr &mapType = leafAt(1);
+ return mapType->resolve(*reader.leafAt(1));
+ }
+ return furtherResolution(reader);
+}
+
+SchemaResolution
+NodeUnion::resolve(const Node &reader) const
+{
+
+ // If the writer is union, resolution only needs to occur when the selected
+ // type of the writer is known, so this function is not very helpful.
+ //
+ // In this case, this function returns if there is a possible match given
+ // any writer type, so just search type by type returning the best match
+ // found.
+
+ SchemaResolution match = RESOLVE_NO_MATCH;
+ for(size_t i=0; i < leaves(); ++i) {
+ const NodePtr &node = leafAt(i);
+ SchemaResolution thisMatch = node->resolve(reader);
+ if(thisMatch == RESOLVE_MATCH) {
+ match = thisMatch;
+ break;
+ }
+ if(match == RESOLVE_NO_MATCH) {
+ match = thisMatch;
+ }
+ }
+ return match;
+}
+
+SchemaResolution
+NodeFixed::resolve(const Node &reader) const
+{
+ if(reader.type() == AVRO_FIXED) {
+ return (
+ (reader.fixedSize() == fixedSize()) &&
+ (reader.name() == name())
+ ) ?
+ RESOLVE_MATCH : RESOLVE_NO_MATCH;
+ }
+ return furtherResolution(reader);
+}
+
+SchemaResolution
+NodeSymbolic::resolve(const Node &reader) const
+{
+ const NodePtr &node = leafAt(0);
+ return node->resolve(reader);
+}
+
+// Wrap an indentation in a struct for ostream operator<<
struct indent {
indent(int depth) :
d(depth)
Modified: hadoop/avro/trunk/src/c++/impl/Types.cc
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/impl/Types.cc?rev=881579&r1=881578&r2=881579&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/impl/Types.cc (original)
+++ hadoop/avro/trunk/src/c++/impl/Types.cc Tue Nov 17 22:34:15 2009
@@ -61,5 +61,11 @@
return os;
}
+std::ostream &operator<< (std::ostream &os, const Null &)
+{
+ os << "(null value)";
+ return os;
+}
+
} // namespace avro
Modified: hadoop/avro/trunk/src/c++/test/unittest.cc
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/test/unittest.cc?rev=881579&r1=881578&r2=881579&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/test/unittest.cc (original)
+++ hadoop/avro/trunk/src/c++/test/unittest.cc Tue Nov 17 22:34:15 2009
@@ -30,6 +30,7 @@
#include "Parser.hh"
#include "SymbolMap.hh"
#include "Compiler.hh"
+#include "SchemaResolution.hh"
#include "AvroSerialize.hh"
@@ -642,6 +643,124 @@
test.add( BOOST_CLASS_TEST_CASE( &T::test, newtest ));
}
+struct TestResolution
+{
+ TestResolution() :
+ int_(IntSchema()),
+ long_(LongSchema()),
+ bool_(BoolSchema()),
+ float_(FloatSchema()),
+ double_(DoubleSchema()),
+
+ mapOfInt_(MapSchema(IntSchema())),
+ mapOfDouble_(MapSchema(DoubleSchema())),
+
+ arrayOfLong_(ArraySchema(LongSchema())),
+ arrayOfFloat_(ArraySchema(FloatSchema()))
+ {
+ {
+ EnumSchema one("one");
+ one.addSymbol("X");
+ enumOne_.setSchema(one);
+
+ EnumSchema two("two");
+ two.addSymbol("Y");
+ enumTwo_.setSchema(two);
+ }
+
+ {
+ UnionSchema one;
+ one.addType(IntSchema());
+ one.addType(FloatSchema());
+ unionOne_.setSchema(one);
+
+ UnionSchema two;
+ two.addType(IntSchema());
+ two.addType(DoubleSchema());
+ unionTwo_.setSchema(two);
+ }
+ }
+
+ SchemaResolution resolve(const ValidSchema &writer, const ValidSchema &reader)
+ {
+ return writer.root()->resolve(*reader.root());
+ }
+
+ void test()
+ {
+ std::cout << "TestResolution\n";
+
+ BOOST_CHECK_EQUAL(resolve(long_, long_), RESOLVE_MATCH);
+ BOOST_CHECK_EQUAL(resolve(long_, bool_), RESOLVE_NO_MATCH);
+ BOOST_CHECK_EQUAL(resolve(bool_, long_), RESOLVE_NO_MATCH);
+
+ BOOST_CHECK_EQUAL(resolve(int_, long_), RESOLVE_PROMOTABLE_TO_LONG);
+ BOOST_CHECK_EQUAL(resolve(long_, int_), RESOLVE_NO_MATCH);
+
+ BOOST_CHECK_EQUAL(resolve(int_, float_), RESOLVE_PROMOTABLE_TO_FLOAT);
+ BOOST_CHECK_EQUAL(resolve(float_, int_), RESOLVE_NO_MATCH);
+
+ BOOST_CHECK_EQUAL(resolve(int_, double_), RESOLVE_PROMOTABLE_TO_DOUBLE);
+ BOOST_CHECK_EQUAL(resolve(double_, int_), RESOLVE_NO_MATCH);
+
+ BOOST_CHECK_EQUAL(resolve(long_, float_), RESOLVE_PROMOTABLE_TO_FLOAT);
+ BOOST_CHECK_EQUAL(resolve(float_, long_), RESOLVE_NO_MATCH);
+
+ BOOST_CHECK_EQUAL(resolve(long_, double_), RESOLVE_PROMOTABLE_TO_DOUBLE);
+ BOOST_CHECK_EQUAL(resolve(double_, long_), RESOLVE_NO_MATCH);
+
+ BOOST_CHECK_EQUAL(resolve(float_, double_), RESOLVE_PROMOTABLE_TO_DOUBLE);
+ BOOST_CHECK_EQUAL(resolve(double_, float_), RESOLVE_NO_MATCH);
+
+ BOOST_CHECK_EQUAL(resolve(int_, mapOfInt_), RESOLVE_NO_MATCH);
+ BOOST_CHECK_EQUAL(resolve(mapOfInt_, int_), RESOLVE_NO_MATCH);
+
+ BOOST_CHECK_EQUAL(resolve(mapOfInt_, mapOfInt_), RESOLVE_MATCH);
+ BOOST_CHECK_EQUAL(resolve(mapOfDouble_, mapOfInt_), RESOLVE_NO_MATCH);
+ BOOST_CHECK_EQUAL(resolve(mapOfInt_, mapOfDouble_), RESOLVE_PROMOTABLE_TO_DOUBLE);
+
+ BOOST_CHECK_EQUAL(resolve(long_, arrayOfLong_), RESOLVE_NO_MATCH);
+ BOOST_CHECK_EQUAL(resolve(arrayOfLong_, long_), RESOLVE_NO_MATCH);
+
+ BOOST_CHECK_EQUAL(resolve(arrayOfLong_, arrayOfLong_), RESOLVE_MATCH);
+ BOOST_CHECK_EQUAL(resolve(arrayOfFloat_, arrayOfLong_), RESOLVE_NO_MATCH);
+ BOOST_CHECK_EQUAL(resolve(arrayOfLong_, arrayOfFloat_), RESOLVE_PROMOTABLE_TO_FLOAT);
+
+ BOOST_CHECK_EQUAL(resolve(enumOne_, enumOne_), RESOLVE_MATCH);
+ BOOST_CHECK_EQUAL(resolve(enumOne_, enumTwo_), RESOLVE_NO_MATCH);
+
+ BOOST_CHECK_EQUAL(resolve(float_, unionOne_), RESOLVE_MATCH);
+ BOOST_CHECK_EQUAL(resolve(double_, unionOne_), RESOLVE_NO_MATCH);
+ BOOST_CHECK_EQUAL(resolve(float_, unionTwo_), RESOLVE_PROMOTABLE_TO_DOUBLE);
+
+ BOOST_CHECK_EQUAL(resolve(unionOne_, float_), RESOLVE_MATCH);
+ BOOST_CHECK_EQUAL(resolve(unionOne_, double_), RESOLVE_PROMOTABLE_TO_DOUBLE);
+ BOOST_CHECK_EQUAL(resolve(unionTwo_, float_), RESOLVE_PROMOTABLE_TO_FLOAT);
+ BOOST_CHECK_EQUAL(resolve(unionOne_, unionTwo_), RESOLVE_MATCH);
+ }
+
+ private:
+
+ ValidSchema int_;
+ ValidSchema long_;
+ ValidSchema bool_;
+ ValidSchema float_;
+ ValidSchema double_;
+
+ ValidSchema mapOfInt_;
+ ValidSchema mapOfDouble_;
+
+ ValidSchema arrayOfLong_;
+ ValidSchema arrayOfFloat_;
+
+ ValidSchema enumOne_;
+ ValidSchema enumTwo_;
+
+ ValidSchema unionOne_;
+ ValidSchema unionTwo_;
+};
+
+
boost::unit_test::test_suite*
init_unit_test_suite( int argc, char* argv[] )
{
@@ -655,6 +774,7 @@
addTestCase<TestNested>(*test);
addTestCase<TestGenerated>(*test);
addTestCase<TestBadStuff>(*test);
+ addTestCase<TestResolution>(*test);
return test;
}