You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by sb...@apache.org on 2009/11/16 19:20:59 UTC

svn commit: r880888 - in /hadoop/avro/trunk: CHANGES.txt src/c++/api/Node.hh src/c++/api/NodeConcepts.hh src/c++/api/NodeImpl.hh src/c++/impl/Schema.cc src/c++/test/unittest.cc

Author: sbanacho
Date: Mon Nov 16 18:20:58 2009
New Revision: 880888

URL: http://svn.apache.org/viewvc?rev=880888&view=rev
Log:
AVRO-197.  Add mapping of name to index for records and enums


Modified:
    hadoop/avro/trunk/CHANGES.txt
    hadoop/avro/trunk/src/c++/api/Node.hh
    hadoop/avro/trunk/src/c++/api/NodeConcepts.hh
    hadoop/avro/trunk/src/c++/api/NodeImpl.hh
    hadoop/avro/trunk/src/c++/impl/Schema.cc
    hadoop/avro/trunk/src/c++/test/unittest.cc

Modified: hadoop/avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/CHANGES.txt?rev=880888&r1=880887&r2=880888&view=diff
==============================================================================
--- hadoop/avro/trunk/CHANGES.txt (original)
+++ hadoop/avro/trunk/CHANGES.txt Mon Nov 16 18:20:58 2009
@@ -16,10 +16,6 @@
 
   IMPROVEMENTS
 
-    AVRO-190. Use fixed size C++ types for Avro fixed types. (sbanacho)
-
-    AVRO-180. Enhance code generator script and unit tests. (sbanacho)
-
     AVRO-157. Changes from code review comments for C++. (sbanacho)
 
     AVRO-168. Correct shared library versioning for C implementation (massie)
@@ -53,6 +49,8 @@
 
     AVRO-177. Upgrade Java dependencies to recent versions. (cutting)
 
+    AVRO-180. Enhance code generator script and unit tests. (sbanacho)
+
     AVRO-186. Full read-path interoperability test (massie)
 
     AVRO-187. Move top-level source files into separate directories 
@@ -60,20 +58,20 @@
 
     AVRO-188. Need to update svn ignores (massie)
 
+    AVRO-190. Use fixed size C++ types for Avro fixed types. (sbanacho)
+
     AVRO-192. Improved errors for Java schema parsing problems. (cutting)
 
     AVRO-195. Complex type support for write streams (massie)
 
+    AVRO-197. Add mapping of name to index for records and enums. (sbanacho)
+
   OPTIMIZATIONS
 
     AVRO-172. More efficient schema processing (massie)
 
   BUG FIXES
  
-    AVRO-194. C++ varint encoding buffer too small. (sbanacho)
-
-    AVRO-191. Explicitly include stdint.h for C++. (cutting via sbanacho)
-
     AVRO-176. Safeguard against bad istreams before reading. (sbanacho)
 
     AVRO-141.  Fix a NullPointerException in ReflectData#isRecord().
@@ -98,6 +96,10 @@
     AVRO-171. Fix Java's Protocol#toString() to correctly handle
     forward-references. (cutting)
 
+    AVRO-191. Explicitly include stdint.h for C++. (cutting via sbanacho)
+
+    AVRO-194. C++ varint encoding buffer too small. (sbanacho)
+
 Avro 1.2.0 (14 October 2009)
 
   INCOMPATIBLE CHANGES

Modified: hadoop/avro/trunk/src/c++/api/Node.hh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/Node.hh?rev=880888&r1=880887&r2=880888&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/api/Node.hh (original)
+++ hadoop/avro/trunk/src/c++/api/Node.hh Mon Nov 16 18:20:58 2009
@@ -95,6 +95,7 @@
     }
     virtual size_t names() const = 0;
     virtual const std::string &nameAt(int index) const = 0;
+    virtual bool nameIndex(const std::string &name, size_t &index) const = 0;
 
     void setFixedSize(int size) {
         checkLock();

Modified: hadoop/avro/trunk/src/c++/api/NodeConcepts.hh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/NodeConcepts.hh?rev=880888&r1=880887&r2=880888&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/api/NodeConcepts.hh (original)
+++ hadoop/avro/trunk/src/c++/api/NodeConcepts.hh Mon Nov 16 18:20:58 2009
@@ -20,6 +20,7 @@
 #define avro_NodeConcepts_hh__
 
 #include <vector>
+#include <map>
 #include "Exception.hh"
 
 namespace avro {
@@ -166,6 +167,49 @@
 };
 
 
+template<typename T>
+struct NameIndexConcept {
+
+    bool lookup(const std::string &name, size_t &index) const {
+        throw Exception("Name index does not exist");
+        return 0;
+    }
+
+    bool add(const::std::string &name, size_t index) {
+        throw Exception("Name index does not exist");
+        return false;
+    }
+};
+
+template<>
+struct NameIndexConcept < MultiAttribute<std::string> > 
+{
+    typedef std::map<std::string, size_t> IndexMap;
+
+    bool lookup(const std::string &name, size_t &index) const {
+        IndexMap::const_iterator iter = map_.find(name); 
+        if(iter == map_.end()) {
+            return false;
+        }
+        index = iter->second;
+        return true;
+    }
+
+    bool add(const::std::string &name, size_t index) {
+        bool added = false;
+        IndexMap::iterator lb = map_.lower_bound(name); 
+        if(lb == map_.end() || map_.key_comp()(name, lb->first)) {
+            map_.insert(lb, IndexMap::value_type(name, index));
+            added = true;
+        }
+        return added;
+    }
+
+  private:
+
+    IndexMap map_;
+};
+
 } // namespace concepts
 } // namespace avro
 

Modified: hadoop/avro/trunk/src/c++/api/NodeImpl.hh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/NodeImpl.hh?rev=880888&r1=880887&r2=880888&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/api/NodeImpl.hh (original)
+++ hadoop/avro/trunk/src/c++/api/NodeImpl.hh Mon Nov 16 18:20:58 2009
@@ -86,6 +86,9 @@
     }
 
     void doAddName(const std::string &name) { 
+        if(! nameIndex_.add(name, leafNameAttributes_.size())) {
+            throw Exception("Cannot add duplicate names");
+        }
         leafNameAttributes_.add(name);
     }
 
@@ -97,6 +100,10 @@
         return leafNameAttributes_.get(index);
     }
 
+    bool nameIndex(const std::string &name, size_t &index) const {
+        return nameIndex_.lookup(name, index);
+    }
+
     void doSetFixedSize(int size) {
         sizeAttribute_.add(size);
     }
@@ -115,6 +122,7 @@
     LeavesConcept leafAttributes_;
     LeafNamesConcept leafNameAttributes_;
     SizeConcept sizeAttribute_;
+    concepts::NameIndexConcept<LeafNamesConcept> nameIndex_;
 };
 
 typedef concepts::NoAttribute<std::string>     NoName;

Modified: hadoop/avro/trunk/src/c++/impl/Schema.cc
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/impl/Schema.cc?rev=880888&r1=880887&r2=880888&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/impl/Schema.cc (original)
+++ hadoop/avro/trunk/src/c++/impl/Schema.cc Mon Nov 16 18:20:58 2009
@@ -44,8 +44,11 @@
 void
 RecordSchema::addField(const std::string &name, const Schema &fieldSchema) 
 {
-    node_->addLeaf(fieldSchema.root());
+    // add the name first. it will throw if the name is a duplicate, preventing
+    // the leaf from being added
     node_->addName(name);
+
+    node_->addLeaf(fieldSchema.root());
 }
 
 EnumSchema::EnumSchema(const std::string &name) :

Modified: hadoop/avro/trunk/src/c++/test/unittest.cc
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/test/unittest.cc?rev=880888&r1=880887&r2=880888&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/test/unittest.cc (original)
+++ hadoop/avro/trunk/src/c++/test/unittest.cc Mon Nov 16 18:20:58 2009
@@ -63,6 +63,17 @@
         myenum.addSymbol("one");
         myenum.addSymbol("two");
         myenum.addSymbol("three");
+
+        bool caught = false;
+        try {
+            myenum.addSymbol("three");
+        }
+        catch(Exception &e) {
+            std::cout << "(intentional) exception: " << e.what() << '\n';
+            caught = true;
+        }
+        BOOST_CHECK_EQUAL(caught, true);
+
         record.addField("myenum", myenum); 
 
         UnionSchema onion;
@@ -80,12 +91,55 @@
         record.addField("mybool", BoolSchema());
         FixedSchema fixed(16, "fixed16");
         record.addField("myfixed", fixed);
-        record.addField("mylong", LongSchema());
+
+        caught = false;
+        try {
+            record.addField("mylong", LongSchema());
+        }
+        catch(Exception &e) {
+            std::cout << "(intentional) exception: " << e.what() << '\n';
+            caught = true;
+        }
+        BOOST_CHECK_EQUAL(caught, true);
+
+        record.addField("mylong2", LongSchema());
+
         record.addField("anotherint", intSchema);
 
         schema_.setSchema(record);
     }
 
+    void checkNameLookup() {
+        NodePtr node = schema_.root();
+
+        size_t index = 0;
+        bool found = node->nameIndex("mylongxxx", index);
+        BOOST_CHECK_EQUAL(found, false);
+
+        found = node->nameIndex("mylong", index);
+        BOOST_CHECK_EQUAL(found, true);
+        BOOST_CHECK_EQUAL(index, 0U);
+
+        found = node->nameIndex("mylong2", index);
+        BOOST_CHECK_EQUAL(found, true);
+        BOOST_CHECK_EQUAL(index, 8U);
+
+        found = node->nameIndex("myenum", index);
+        BOOST_CHECK_EQUAL(found, true);
+        NodePtr enumNode = node->leafAt(index);
+
+        found = enumNode->nameIndex("one", index);
+        BOOST_CHECK_EQUAL(found, true);
+        BOOST_CHECK_EQUAL(index, 1U);
+
+        found = enumNode->nameIndex("three", index);
+        BOOST_CHECK_EQUAL(found, true);
+        BOOST_CHECK_EQUAL(index, 3U);
+
+        found = enumNode->nameIndex("four", index);
+        BOOST_CHECK_EQUAL(found, false);
+    }
+
     template<typename Serializer>
     void printUnion(Serializer &s, int path)
     {
@@ -329,6 +383,8 @@
         schema_.toJson(std::cout);
         schema_.toFlatList(std::cout);
 
+        checkNameLookup();
+
         printEncoding();
         printValidatingEncoding(0);
         printValidatingEncoding(1);