You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by sb...@apache.org on 2009/11/17 23:34:16 UTC

svn commit: r881579 - in /hadoop/avro/trunk: ./ src/c++/api/ src/c++/impl/ src/c++/test/

Author: sbanacho
Date: Tue Nov 17 22:34:15 2009
New Revision: 881579

URL: http://svn.apache.org/viewvc?rev=881579&view=rev
Log:
AVRO-205. APIs for checking schema resolution. 

Added:
    hadoop/avro/trunk/src/c++/api/SchemaResolution.hh
Modified:
    hadoop/avro/trunk/CHANGES.txt
    hadoop/avro/trunk/src/c++/api/Exception.hh
    hadoop/avro/trunk/src/c++/api/Node.hh
    hadoop/avro/trunk/src/c++/api/NodeImpl.hh
    hadoop/avro/trunk/src/c++/api/Reader.hh
    hadoop/avro/trunk/src/c++/api/SymbolMap.hh
    hadoop/avro/trunk/src/c++/impl/NodeImpl.cc
    hadoop/avro/trunk/src/c++/impl/Types.cc
    hadoop/avro/trunk/src/c++/test/unittest.cc

Modified: hadoop/avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/CHANGES.txt?rev=881579&r1=881578&r2=881579&view=diff
==============================================================================
--- hadoop/avro/trunk/CHANGES.txt (original)
+++ hadoop/avro/trunk/CHANGES.txt Tue Nov 17 22:34:15 2009
@@ -68,6 +68,8 @@
 
     AVRO-204. Change the way symbolic references are tracked. (sbanacho)
 
+    AVRO-205. APIs for checking schema resolution. (sbanacho)
+
   OPTIMIZATIONS
 
     AVRO-172. More efficient schema processing (massie)

Modified: hadoop/avro/trunk/src/c++/api/Exception.hh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/Exception.hh?rev=881579&r1=881578&r2=881579&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/api/Exception.hh (original)
+++ hadoop/avro/trunk/src/c++/api/Exception.hh Tue Nov 17 22:34:15 2009
@@ -31,7 +31,7 @@
 {
   public:
 
-    Exception(const std::string msg) :
+    Exception(const std::string &msg) :
         std::runtime_error(msg)
     { }
 

Modified: hadoop/avro/trunk/src/c++/api/Node.hh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/Node.hh?rev=881579&r1=881578&r2=881579&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/api/Node.hh (original)
+++ hadoop/avro/trunk/src/c++/api/Node.hh Tue Nov 17 22:34:15 2009
@@ -25,6 +25,7 @@
 
 #include "Exception.hh"
 #include "Types.hh"
+#include "SchemaResolution.hh"
 
 namespace avro {
 
@@ -54,7 +55,6 @@
 
     Node(Type type) :
         type_(type),
-        refCount_(0),
         locked_(false)
     {}
 
@@ -105,6 +105,8 @@
 
     virtual bool isValid() const = 0;
 
+    virtual SchemaResolution resolve(const Node &reader) const = 0;
+
     virtual void printJson(std::ostream &os, int depth) const = 0;
 
     virtual void printBasicInfo(std::ostream &os) const = 0;
@@ -131,7 +133,6 @@
   private:
 
     const Type type_;
-    int refCount_;
     bool locked_;
 };
 

Modified: hadoop/avro/trunk/src/c++/api/NodeImpl.hh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/NodeImpl.hh?rev=881579&r1=881578&r2=881579&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/api/NodeImpl.hh (original)
+++ hadoop/avro/trunk/src/c++/api/NodeImpl.hh Tue Nov 17 22:34:15 2009
@@ -118,6 +118,8 @@
 
     void setLeafToSymbolic(int index, const NodePtr &node);
    
+    SchemaResolution furtherResolution(const Node &node) const;
+
     NameConcept nameAttribute_;
     LeavesConcept leafAttributes_;
     LeafNamesConcept leafNameAttributes_;
@@ -156,6 +158,8 @@
         NodeImplPrimitive(type)
     { }
 
+    SchemaResolution resolve(const Node &reader)  const;
+
     void printJson(std::ostream &os, int depth) const;
 
     bool isValid() const {
@@ -175,6 +179,8 @@
         NodeImplSymbolic(AVRO_SYMBOLIC, name, node, NoLeafNames(), NoSize())
     { }
 
+    SchemaResolution resolve(const Node &reader)  const;
+
     void printJson(std::ostream &os, int depth) const;
 
     bool isValid() const {
@@ -195,6 +201,8 @@
         NodeImplRecord(AVRO_RECORD, name, fields, fieldsNames, NoSize())
     { }
 
+    SchemaResolution resolve(const Node &reader)  const;
+
     void printJson(std::ostream &os, int depth) const;
 
     bool isValid() const {
@@ -218,6 +226,8 @@
         NodeImplEnum(AVRO_ENUM, name, NoLeaves(), symbols, NoSize())
     { }
 
+    SchemaResolution resolve(const Node &reader)  const;
+
     void printJson(std::ostream &os, int depth) const;
 
     bool isValid() const {
@@ -240,6 +250,8 @@
         NodeImplArray(AVRO_ARRAY, NoName(), items, NoLeafNames(), NoSize())
     { }
 
+    SchemaResolution resolve(const Node &reader)  const;
+
     void printJson(std::ostream &os, int depth) const;
 
     bool isValid() const {
@@ -269,6 +281,8 @@
         std::swap(leafAttributes_.get(0), leafAttributes_.get(1));
     }
 
+    SchemaResolution resolve(const Node &reader)  const;
+
     void printJson(std::ostream &os, int depth) const;
 
     bool isValid() const {
@@ -288,6 +302,8 @@
         NodeImplUnion(AVRO_UNION, NoName(), types, NoLeafNames(), NoSize())
     { }
 
+    SchemaResolution resolve(const Node &reader)  const;
+
     void printJson(std::ostream &os, int depth) const;
 
     bool isValid() const {
@@ -307,6 +323,8 @@
         NodeImplFixed(AVRO_FIXED, name, NoLeaves(), NoLeafNames(), size)
     { }
 
+    SchemaResolution resolve(const Node &reader)  const;
+
     void printJson(std::ostream &os, int depth) const;
 
     bool isValid() const {

Modified: hadoop/avro/trunk/src/c++/api/Reader.hh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/Reader.hh?rev=881579&r1=881578&r2=881579&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/api/Reader.hh (original)
+++ hadoop/avro/trunk/src/c++/api/Reader.hh Tue Nov 17 22:34:15 2009
@@ -81,6 +81,7 @@
 
     void readValue(std::string &val) {
         int64_t size = readSize();
+        val.clear();
         val.reserve(size);
         uint8_t bval;
         for(size_t bytes = 0; bytes < static_cast<size_t>(size); bytes++) {

Added: hadoop/avro/trunk/src/c++/api/SchemaResolution.hh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/SchemaResolution.hh?rev=881579&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/api/SchemaResolution.hh (added)
+++ hadoop/avro/trunk/src/c++/api/SchemaResolution.hh Tue Nov 17 22:34:15 2009
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef avro_SchemaResolution_hh__
+#define avro_SchemaResolution_hh__
+
+namespace avro {
+
+
+enum SchemaResolution {
+
+    /// The schemas definitely do not match
+    
+    RESOLVE_NO_MATCH, 
+
+    /// The schemas match at a cursory level
+    ///
+    /// For records and enums, this means the name is the same, but it does not
+    /// necessarily mean that every symbol or field is an exact match.
+    
+    RESOLVE_MATCH,    
+
+    /// For primitives, the matching may occur if the type is promotable.  This means that the
+    /// writer matches reader if the writer's type is promoted the specified type.
+    
+    //@{
+    
+    RESOLVE_PROMOTABLE_TO_LONG,
+    RESOLVE_PROMOTABLE_TO_FLOAT,
+    RESOLVE_PROMOTABLE_TO_DOUBLE,
+
+    //@}
+
+};
+
+} // namespace avro 
+
+#endif

Modified: hadoop/avro/trunk/src/c++/api/SymbolMap.hh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/SymbolMap.hh?rev=881579&r1=881578&r2=881579&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/api/SymbolMap.hh (original)
+++ hadoop/avro/trunk/src/c++/api/SymbolMap.hh Tue Nov 17 22:34:15 2009
@@ -20,6 +20,7 @@
 #define avro_SymbolMap_hh__
 
 #include <map>
+#include <boost/noncopyable.hpp>
 
 #include "Node.hh"
 #include "Schema.hh"
@@ -32,7 +33,7 @@
 /// implements the symbolic name to node mapping.
 ///
 
-class SymbolMap 
+class SymbolMap : private boost::noncopyable
 {
 
   public:

Modified: hadoop/avro/trunk/src/c++/impl/NodeImpl.cc
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/impl/NodeImpl.cc?rev=881579&r1=881578&r2=881579&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/impl/NodeImpl.cc (original)
+++ hadoop/avro/trunk/src/c++/impl/NodeImpl.cc Tue Nov 17 22:34:15 2009
@@ -21,7 +21,172 @@
 
 namespace avro {
 
-/// Wrap an indentation in a struct for ostream operator<< 
+
+template < class A, class B, class C, class D >
+SchemaResolution 
+NodeImpl<A,B,C,D>::furtherResolution(const Node &reader) const
+{
+    SchemaResolution match = RESOLVE_NO_MATCH;
+
+    if(reader.type() == AVRO_SYMBOLIC) {
+    
+        // resolve the symbolic type, and check again
+        const NodePtr &node = reader.leafAt(0);
+        match = resolve(*node);
+    }
+    else if(reader.type() == AVRO_UNION) {
+
+        // in this case, need to see if there is an exact match for the
+        // writer's type, or if not, the first one that can be promoted to a
+        // match
+        
+        for(size_t i= 0; i < reader.leaves(); ++i)  {
+
+            const NodePtr &node = reader.leafAt(i);
+            SchemaResolution thisMatch = resolve(*node);
+
+            // if matched then the search is done
+            if(thisMatch == RESOLVE_MATCH) {
+                match = thisMatch;
+                break;
+            }
+
+            // thisMatch is either no match, or promotable, this will set match to 
+            // promotable if it hasn't been set already
+            if (match == RESOLVE_NO_MATCH) {
+                match = thisMatch;
+            }
+        }
+    }
+
+    return match;
+}
+
+SchemaResolution 
+NodePrimitive::resolve(const Node &reader) const
+{
+    if(type() == reader.type()) {
+        return RESOLVE_MATCH;
+    }
+
+    switch ( type() ) {
+
+      case AVRO_INT:
+
+        if( reader.type() == AVRO_LONG ) { 
+            return RESOLVE_PROMOTABLE_TO_LONG;
+        }   
+
+        // fall-through intentional
+
+      case AVRO_LONG:
+ 
+        if (reader.type() == AVRO_FLOAT) {
+            return RESOLVE_PROMOTABLE_TO_FLOAT;
+        }   
+
+        // fall-through intentional
+
+      case AVRO_FLOAT:
+
+        if (reader.type() == AVRO_DOUBLE) {
+            return RESOLVE_PROMOTABLE_TO_DOUBLE;
+        }   
+
+      default:
+        break;
+    }   
+
+    return furtherResolution(reader);
+}
+
+SchemaResolution 
+NodeRecord::resolve(const Node &reader) const
+{
+    if(reader.type() == AVRO_RECORD) {
+        if(name() == reader.name()) {
+            return RESOLVE_MATCH;
+        }
+    }
+    return furtherResolution(reader);
+}
+
+SchemaResolution 
+NodeEnum::resolve(const Node &reader) const
+{
+    if(reader.type() == AVRO_ENUM) {
+        return (name() == reader.name()) ? RESOLVE_MATCH : RESOLVE_NO_MATCH;
+    }
+    return furtherResolution(reader);
+}
+
+SchemaResolution 
+NodeArray::resolve(const Node &reader) const
+{
+    if(reader.type() == AVRO_ARRAY) {
+        const NodePtr &arrayType = leafAt(0);
+        return arrayType->resolve(*reader.leafAt(0));
+    }
+    return furtherResolution(reader);
+}
+
+SchemaResolution 
+NodeMap::resolve(const Node &reader) const
+{
+    if(reader.type() == AVRO_MAP) {
+        const NodePtr &mapType = leafAt(1);
+        return mapType->resolve(*reader.leafAt(1));
+    }
+    return furtherResolution(reader);
+}
+
+SchemaResolution
+NodeUnion::resolve(const Node &reader) const 
+{
+
+    // If the writer is union, resolution only needs to occur when the selected
+    // type of the writer is known, so this function is not very helpful.
+    //
+    // In this case, this function returns if there is a possible match given
+    // any writer type, so just search type by type returning the best match
+    // found.
+    
+    SchemaResolution match = RESOLVE_NO_MATCH;
+    for(size_t i=0; i < leaves(); ++i) {
+        const NodePtr &node = leafAt(i);
+        SchemaResolution thisMatch = node->resolve(reader);
+        if(thisMatch == RESOLVE_MATCH) {
+            match = thisMatch;
+            break;
+        }
+        if(match == RESOLVE_NO_MATCH) {
+            match = thisMatch;
+        }
+    }
+    return match;
+}
+
+SchemaResolution 
+NodeFixed::resolve(const Node &reader) const
+{
+    if(reader.type() == AVRO_FIXED) {
+        return (
+                (reader.fixedSize() == fixedSize()) &&
+                (reader.name() == name()) 
+            ) ? 
+            RESOLVE_MATCH : RESOLVE_NO_MATCH;
+    }
+    return furtherResolution(reader);
+}
+
+SchemaResolution 
+NodeSymbolic::resolve(const Node &reader) const
+{
+    const NodePtr &node = leafAt(0);
+    return node->resolve(reader);
+}
+
+// Wrap an indentation in a struct for ostream operator<< 
 struct indent { 
     indent(int depth) :
         d(depth)

Modified: hadoop/avro/trunk/src/c++/impl/Types.cc
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/impl/Types.cc?rev=881579&r1=881578&r2=881579&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/impl/Types.cc (original)
+++ hadoop/avro/trunk/src/c++/impl/Types.cc Tue Nov 17 22:34:15 2009
@@ -61,5 +61,11 @@
     return os;
 }
 
+std::ostream &operator<< (std::ostream &os, const Null &)
+{
+    os << "(null value)";
+    return os;
+}
+
 } // namespace avro
 

Modified: hadoop/avro/trunk/src/c++/test/unittest.cc
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/test/unittest.cc?rev=881579&r1=881578&r2=881579&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/test/unittest.cc (original)
+++ hadoop/avro/trunk/src/c++/test/unittest.cc Tue Nov 17 22:34:15 2009
@@ -30,6 +30,7 @@
 #include "Parser.hh"
 #include "SymbolMap.hh"
 #include "Compiler.hh"
+#include "SchemaResolution.hh"
 
 #include "AvroSerialize.hh"
 
@@ -642,6 +643,124 @@
     test.add( BOOST_CLASS_TEST_CASE( &T::test, newtest ));
 }
 
+struct TestResolution
+{
+    TestResolution() :
+        int_(IntSchema()), 
+        long_(LongSchema()),
+        bool_(BoolSchema()), 
+        float_(FloatSchema()), 
+        double_(DoubleSchema()),
+
+        mapOfInt_(MapSchema(IntSchema())),
+        mapOfDouble_(MapSchema(DoubleSchema())),
+
+        arrayOfLong_(ArraySchema(LongSchema())),
+        arrayOfFloat_(ArraySchema(FloatSchema()))
+    {
+        {
+            EnumSchema one("one");
+            one.addSymbol("X");
+            enumOne_.setSchema(one);
+
+            EnumSchema two("two");
+            two.addSymbol("Y");
+            enumTwo_.setSchema(two);
+        }
+    
+        {
+            UnionSchema one;
+            one.addType(IntSchema());
+            one.addType(FloatSchema());
+            unionOne_.setSchema(one);
+
+            UnionSchema two;
+            two.addType(IntSchema());
+            two.addType(DoubleSchema());
+            unionTwo_.setSchema(two);
+        }
+    }
+
+    SchemaResolution resolve(const ValidSchema &writer, const ValidSchema &reader)
+    {
+        return writer.root()->resolve(*reader.root());
+    }
+
+    void test() 
+    {
+        std::cout << "TestResolution\n";
+
+        BOOST_CHECK_EQUAL(resolve(long_, long_), RESOLVE_MATCH); 
+        BOOST_CHECK_EQUAL(resolve(long_, bool_), RESOLVE_NO_MATCH); 
+        BOOST_CHECK_EQUAL(resolve(bool_, long_), RESOLVE_NO_MATCH); 
+
+        BOOST_CHECK_EQUAL(resolve(int_, long_), RESOLVE_PROMOTABLE_TO_LONG); 
+        BOOST_CHECK_EQUAL(resolve(long_, int_), RESOLVE_NO_MATCH); 
+
+        BOOST_CHECK_EQUAL(resolve(int_, float_), RESOLVE_PROMOTABLE_TO_FLOAT); 
+        BOOST_CHECK_EQUAL(resolve(float_, int_), RESOLVE_NO_MATCH); 
+
+        BOOST_CHECK_EQUAL(resolve(int_, double_), RESOLVE_PROMOTABLE_TO_DOUBLE); 
+        BOOST_CHECK_EQUAL(resolve(double_, int_), RESOLVE_NO_MATCH); 
+
+        BOOST_CHECK_EQUAL(resolve(long_, float_), RESOLVE_PROMOTABLE_TO_FLOAT); 
+        BOOST_CHECK_EQUAL(resolve(float_, long_), RESOLVE_NO_MATCH); 
+
+        BOOST_CHECK_EQUAL(resolve(long_, double_), RESOLVE_PROMOTABLE_TO_DOUBLE); 
+        BOOST_CHECK_EQUAL(resolve(double_, long_), RESOLVE_NO_MATCH); 
+
+        BOOST_CHECK_EQUAL(resolve(float_, double_), RESOLVE_PROMOTABLE_TO_DOUBLE); 
+        BOOST_CHECK_EQUAL(resolve(double_, float_), RESOLVE_NO_MATCH); 
+
+        BOOST_CHECK_EQUAL(resolve(int_, mapOfInt_), RESOLVE_NO_MATCH);
+        BOOST_CHECK_EQUAL(resolve(mapOfInt_, int_), RESOLVE_NO_MATCH);
+
+        BOOST_CHECK_EQUAL(resolve(mapOfInt_, mapOfInt_), RESOLVE_MATCH);
+        BOOST_CHECK_EQUAL(resolve(mapOfDouble_, mapOfInt_), RESOLVE_NO_MATCH);
+        BOOST_CHECK_EQUAL(resolve(mapOfInt_, mapOfDouble_), RESOLVE_PROMOTABLE_TO_DOUBLE);
+
+        BOOST_CHECK_EQUAL(resolve(long_, arrayOfLong_), RESOLVE_NO_MATCH);
+        BOOST_CHECK_EQUAL(resolve(arrayOfLong_, long_), RESOLVE_NO_MATCH);
+
+        BOOST_CHECK_EQUAL(resolve(arrayOfLong_, arrayOfLong_), RESOLVE_MATCH);
+        BOOST_CHECK_EQUAL(resolve(arrayOfFloat_, arrayOfLong_), RESOLVE_NO_MATCH);
+        BOOST_CHECK_EQUAL(resolve(arrayOfLong_, arrayOfFloat_), RESOLVE_PROMOTABLE_TO_FLOAT);
+
+        BOOST_CHECK_EQUAL(resolve(enumOne_, enumOne_), RESOLVE_MATCH);
+        BOOST_CHECK_EQUAL(resolve(enumOne_, enumTwo_), RESOLVE_NO_MATCH);
+
+        BOOST_CHECK_EQUAL(resolve(float_, unionOne_), RESOLVE_MATCH);
+        BOOST_CHECK_EQUAL(resolve(double_, unionOne_), RESOLVE_NO_MATCH);
+        BOOST_CHECK_EQUAL(resolve(float_, unionTwo_), RESOLVE_PROMOTABLE_TO_DOUBLE);
+
+        BOOST_CHECK_EQUAL(resolve(unionOne_, float_), RESOLVE_MATCH);
+        BOOST_CHECK_EQUAL(resolve(unionOne_, double_), RESOLVE_PROMOTABLE_TO_DOUBLE);
+        BOOST_CHECK_EQUAL(resolve(unionTwo_, float_), RESOLVE_PROMOTABLE_TO_FLOAT);
+        BOOST_CHECK_EQUAL(resolve(unionOne_, unionTwo_), RESOLVE_MATCH);
+    }
+
+  private:
+
+    ValidSchema int_;
+    ValidSchema long_;
+    ValidSchema bool_;
+    ValidSchema float_;
+    ValidSchema double_;
+
+    ValidSchema mapOfInt_;
+    ValidSchema mapOfDouble_;
+
+    ValidSchema arrayOfLong_;
+    ValidSchema arrayOfFloat_;
+
+    ValidSchema enumOne_;
+    ValidSchema enumTwo_;
+
+    ValidSchema unionOne_;
+    ValidSchema unionTwo_;
+};
+
+
 boost::unit_test::test_suite*
 init_unit_test_suite( int argc, char* argv[] ) 
 {
@@ -655,6 +774,7 @@
     addTestCase<TestNested>(*test);
     addTestCase<TestGenerated>(*test);
     addTestCase<TestBadStuff>(*test);
+    addTestCase<TestResolution>(*test);
 
     return test;
 }