You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by ma...@apache.org on 2010/01/22 01:38:03 UTC

svn commit: r901950 - in /hadoop/avro/trunk: ./ lang/c/ lang/c/src/ lang/c/tests/

Author: massie
Date: Fri Jan 22 00:38:01 2010
New Revision: 901950

URL: http://svn.apache.org/viewvc?rev=901950&view=rev
Log:
AVRO-364.  Add support for encoding/decoding records

Added:
    hadoop/avro/trunk/lang/c/src/datum_equal.c
    hadoop/avro/trunk/lang/c/src/datum_read.c
    hadoop/avro/trunk/lang/c/src/datum_validate.c
    hadoop/avro/trunk/lang/c/src/datum_write.c
Modified:
    hadoop/avro/trunk/CHANGES.txt
    hadoop/avro/trunk/lang/c/src/Makefile.am
    hadoop/avro/trunk/lang/c/src/avro.h
    hadoop/avro/trunk/lang/c/src/datum.c
    hadoop/avro/trunk/lang/c/src/io.c
    hadoop/avro/trunk/lang/c/src/schema.c
    hadoop/avro/trunk/lang/c/src/schema.h
    hadoop/avro/trunk/lang/c/tests/test_avro_data.c
    hadoop/avro/trunk/lang/c/version.sh

Modified: hadoop/avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/CHANGES.txt?rev=901950&r1=901949&r2=901950&view=diff
==============================================================================
--- hadoop/avro/trunk/CHANGES.txt (original)
+++ hadoop/avro/trunk/CHANGES.txt Fri Jan 22 00:38:01 2010
@@ -238,6 +238,8 @@
     AVRO-362. Add test to ensure Python implementation handles Union schema
     with two fixed types of different names (hammer)
 
+    AVRO-364. Add support for encoding/decoding records (massie)
+
   OPTIMIZATIONS
 
     AVRO-172. More efficient schema processing (massie)

Modified: hadoop/avro/trunk/lang/c/src/Makefile.am
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/Makefile.am?rev=901950&r1=901949&r2=901950&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/Makefile.am (original)
+++ hadoop/avro/trunk/lang/c/src/Makefile.am Fri Jan 22 00:38:01 2010
@@ -6,8 +6,10 @@
 include_HEADERS = avro.h
 
 lib_LTLIBRARIES = libavro.la
-libavro_la_SOURCES = st.c st.h schema.c schema.h schema_printf.c schema_equal.c datum.c datum.h \
-io.c dump.c dump.h encoding_binary.c container_of.h queue.h encoding.h
+libavro_la_SOURCES = st.c st.h schema.c schema.h schema_printf.c schema_equal.c \
+datum.c datum_equal.c datum_validate.c datum_read.c datum_write.c datum.h \
+io.c dump.c dump.h encoding_binary.c \
+container_of.h queue.h encoding.h
 libavro_la_LIBADD = $(top_builddir)/jansson/src/.libs/libjansson.a
 libavro_la_LDFLAGS = \
         -version-info $(LIBAVRO_VERSION) \

Modified: hadoop/avro/trunk/lang/c/src/avro.h
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/avro.h?rev=901950&r1=901949&r2=901950&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/avro.h (original)
+++ hadoop/avro/trunk/lang/c/src/avro.h Fri Jan 22 00:38:01 2010
@@ -154,6 +154,9 @@
 int avro_write(avro_writer_t writer, void *buf, int64_t len);
 int avro_flush(avro_writer_t writer);
 
+void avro_writer_dump(avro_writer_t writer, FILE * fp);
+void avro_reader_dump(avro_reader_t reader, FILE * fp);
+
 void avro_reader_free(avro_reader_t reader);
 void avro_writer_free(avro_writer_t writer);
 

Modified: hadoop/avro/trunk/lang/c/src/datum.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum.c?rev=901950&r1=901949&r2=901950&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum.c (original)
+++ hadoop/avro/trunk/lang/c/src/datum.c Fri Jan 22 00:38:01 2010
@@ -31,109 +31,6 @@
 	datum->refcount = 1;
 }
 
-static int
-array_equal(struct avro_array_datum_t *a, struct avro_array_datum_t *b)
-{
-	struct avro_array_element_t *a_el, *b_el;
-	if (a->num_elements != b->num_elements) {
-		return 0;
-	}
-	for (a_el = STAILQ_FIRST(&a->els),
-	     b_el = STAILQ_FIRST(&b->els);
-	     !(a_el == NULL && a_el == NULL);
-	     a_el = STAILQ_NEXT(a_el, els), b_el = STAILQ_NEXT(b_el, els)) {
-		if (a_el == NULL || b_el == NULL) {
-			return 0;	/* different number of elements */
-		}
-		if (!avro_datum_equal(a_el->datum, b_el->datum)) {
-			return 0;
-		}
-	}
-	return 1;
-}
-
-struct map_equal_args {
-	int rval;
-	st_table *st;
-};
-
-static int
-map_equal_foreach(char *key, avro_datum_t datum, struct map_equal_args *args)
-{
-	avro_datum_t datum_other = NULL;
-
-	st_lookup(args->st, (st_data_t) key, (st_data_t *) & datum_other);
-	if (!datum_other) {
-		args->rval = 0;
-		return ST_STOP;
-	}
-	if (!avro_datum_equal(datum, datum_other)) {
-		args->rval = 0;
-		return ST_STOP;
-	}
-	return ST_CONTINUE;
-}
-
-static int map_equal(struct avro_map_datum_t *a, struct avro_map_datum_t *b)
-{
-	struct map_equal_args args = { 1, avro_datum_to_map(b)->map };
-	if (a->map->num_entries != b->map->num_entries) {
-		return 0;
-	}
-	st_foreach(avro_datum_to_map(a)->map,
-		   map_equal_foreach, (st_data_t) & args);
-	return args.rval;
-}
-
-int avro_datum_equal(avro_datum_t a, avro_datum_t b)
-{
-	if (!(is_avro_datum(a) && is_avro_datum(b))) {
-		return 0;
-	}
-	if (avro_typeof(a) != avro_typeof(b)) {
-		return 0;
-	}
-	switch (avro_typeof(a)) {
-	case AVRO_STRING:
-		return strcmp(avro_datum_to_string(a)->s,
-			      avro_datum_to_string(b)->s) == 0;
-	case AVRO_BYTES:
-		return (avro_datum_to_bytes(a)->size ==
-			avro_datum_to_bytes(b)->size)
-		    && memcmp(avro_datum_to_bytes(a)->bytes,
-			      avro_datum_to_bytes(b)->bytes,
-			      avro_datum_to_bytes(a)->size) == 0;
-	case AVRO_INT:
-		return avro_datum_to_int(a)->i == avro_datum_to_int(b)->i;
-	case AVRO_LONG:
-		return avro_datum_to_long(a)->l == avro_datum_to_long(b)->l;
-	case AVRO_FLOAT:
-		return avro_datum_to_float(a)->f == avro_datum_to_float(b)->f;
-	case AVRO_DOUBLE:
-		return avro_datum_to_double(a)->d == avro_datum_to_double(b)->d;
-	case AVRO_BOOLEAN:
-		return avro_datum_to_boolean(a)->i ==
-		    avro_datum_to_boolean(b)->i;
-	case AVRO_NULL:
-		return 1;
-	case AVRO_ARRAY:
-		return array_equal(avro_datum_to_array(a),
-				   avro_datum_to_array(b));
-	case AVRO_MAP:
-		return map_equal(avro_datum_to_map(a), avro_datum_to_map(b));
-	case AVRO_RECORD:
-	case AVRO_ENUM:
-	case AVRO_FIXED:
-	case AVRO_UNION:
-	case AVRO_LINK:
-		/*
-		 * TODO 
-		 */
-		return 0;
-	}
-	return 0;
-}
-
 avro_datum_t avro_string(const char *str)
 {
 	struct avro_string_datum_t *datum =
@@ -257,14 +154,19 @@
 avro_datum_t
 avro_record_field_get(const avro_datum_t datum, const char *field_name)
 {
-	struct avro_record_datum_t *field = NULL;
+	union {
+		avro_datum_t field;
+		st_data_t data;
+	} val;
 	if (is_avro_datum(datum) && is_avro_record(datum)) {
 		struct avro_record_datum_t *record =
 		    avro_datum_to_record(datum);
-		st_lookup(record->fields, (st_data_t) field_name,
-			  (st_data_t *) & field);
+		if (st_lookup
+		    (record->fields, (st_data_t) field_name, &(val.data))) {
+			return val.field;
+		}
 	}
-	return &field->obj;
+	return NULL;
 }
 
 int
@@ -392,744 +294,3 @@
 {
 
 }
-
-int
-avro_schema_match(avro_schema_t writers_schema, avro_schema_t readers_schema)
-{
-	if (!is_avro_schema(writers_schema) || !is_avro_schema(readers_schema)) {
-		return 0;
-	}
-
-	switch (avro_typeof(writers_schema)) {
-	case AVRO_UNION:
-		return 1;
-
-	case AVRO_INT:
-		return is_avro_int(readers_schema)
-		    || is_avro_long(readers_schema)
-		    || is_avro_float(readers_schema)
-		    || is_avro_double(readers_schema);
-
-	case AVRO_LONG:
-		return is_avro_long(readers_schema)
-		    || is_avro_float(readers_schema)
-		    || is_avro_double(readers_schema);
-
-	case AVRO_FLOAT:
-		return is_avro_float(readers_schema)
-		    || is_avro_double(readers_schema);
-
-	case AVRO_STRING:
-	case AVRO_BYTES:
-	case AVRO_DOUBLE:
-	case AVRO_BOOLEAN:
-	case AVRO_NULL:
-		return avro_typeof(writers_schema) ==
-		    avro_typeof(readers_schema);
-
-	case AVRO_RECORD:
-		return is_avro_record(readers_schema)
-		    && strcmp(avro_schema_name(writers_schema),
-			      avro_schema_name(readers_schema)) == 0;
-
-	case AVRO_FIXED:
-		return is_avro_fixed(readers_schema)
-		    && strcmp(avro_schema_name(writers_schema),
-			      avro_schema_name(readers_schema)) == 0
-		    && (avro_schema_to_fixed(writers_schema))->size ==
-		    (avro_schema_to_fixed(readers_schema))->size;
-
-	case AVRO_ENUM:
-		return is_avro_enum(readers_schema)
-		    && strcmp(avro_schema_to_enum(writers_schema)->name,
-			      avro_schema_to_enum(readers_schema)->name) == 0;
-
-	case AVRO_MAP:
-		return is_avro_map(readers_schema)
-		    && avro_typeof(avro_schema_to_map(writers_schema)->values)
-		    == avro_typeof(avro_schema_to_map(readers_schema)->values);
-
-	case AVRO_ARRAY:
-		return is_avro_array(readers_schema)
-		    && avro_typeof(avro_schema_to_array(writers_schema)->items)
-		    == avro_typeof(avro_schema_to_array(readers_schema)->items);
-
-	case AVRO_LINK:
-		/*
-		 * TODO 
-		 */
-		break;
-	}
-
-	return 0;
-}
-
-static int
-read_fixed(avro_reader_t reader, const avro_encoding_t * enc,
-	   avro_schema_t writers_schema, avro_schema_t readers_schema,
-	   avro_datum_t * datum)
-{
-	return 1;
-}
-
-static int
-read_enum(avro_reader_t reader, const avro_encoding_t * enc,
-	  avro_schema_t writers_schema, avro_schema_t readers_schema,
-	  avro_datum_t * datum)
-{
-	return 1;
-}
-
-static int
-read_array(avro_reader_t reader, const avro_encoding_t * enc,
-	   struct avro_array_schema_t *writers_schema,
-	   struct avro_array_schema_t *readers_schema, avro_datum_t * datum)
-{
-	int rval;
-	int64_t i;
-	int64_t block_count;
-	int64_t block_size;
-	avro_datum_t array_datum;
-
-	rval = enc->read_long(reader, &block_count);
-	if (rval) {
-		return rval;
-	}
-
-	array_datum = avro_array();
-	while (block_count != 0) {
-		if (block_count < 0) {
-			block_count = block_count * -1;
-			rval = enc->read_long(reader, &block_size);
-			if (rval) {
-				return rval;
-			}
-		}
-
-		for (i = 0; i < block_count; i++) {
-			avro_datum_t datum;
-
-			rval =
-			    avro_read_data(reader, writers_schema->items,
-					   readers_schema->items, &datum);
-			if (rval) {
-				return rval;
-			}
-			rval = avro_array_append_datum(array_datum, datum);
-			if (rval) {
-				avro_datum_decref(array_datum);
-				return rval;
-			}
-		}
-
-		rval = enc->read_long(reader, &block_count);
-		if (rval) {
-			return rval;
-		}
-	}
-	*datum = array_datum;
-	return 0;
-}
-
-static int
-read_map(avro_reader_t reader, const avro_encoding_t * enc,
-	 struct avro_map_schema_t *writers_schema,
-	 struct avro_map_schema_t *readers_schema, avro_datum_t * datum)
-{
-	int rval;
-	int64_t i, block_count;
-	avro_datum_t map = avro_map();
-
-	rval = enc->read_long(reader, &block_count);
-	if (rval) {
-		return rval;
-	}
-	while (block_count != 0) {
-		int64_t block_size;
-		if (block_count < 0) {
-			block_count = block_count * -1;
-			rval = enc->read_long(reader, &block_size);
-			if (rval) {
-				return rval;
-			}
-		}
-		for (i = 0; i < block_count; i++) {
-			char *key;
-			avro_datum_t value;
-			rval = enc->read_string(reader, &key);
-			if (rval) {
-				return rval;
-			}
-			rval =
-			    avro_read_data(reader,
-					   avro_schema_to_map(writers_schema)->
-					   values,
-					   avro_schema_to_map(readers_schema)->
-					   values, &value);
-			if (rval) {
-				return rval;
-			}
-			rval = avro_map_set(map, key, value);
-			if (rval) {
-				return rval;
-			}
-		}
-		rval = enc->read_long(reader, &block_count);
-		if (rval) {
-			return rval;
-		}
-	}
-	*datum = map;
-	return 0;
-}
-
-static int
-read_union(avro_reader_t reader, const avro_encoding_t * enc,
-	   avro_schema_t writers_schema, avro_schema_t readers_schema,
-	   avro_datum_t * datum)
-{
-	return 1;
-}
-
-static int
-read_record(avro_reader_t reader, const avro_encoding_t * enc,
-	    avro_schema_t writers_schema, avro_schema_t readers_schema,
-	    avro_datum_t * datum)
-{
-	return 1;
-}
-
-int
-avro_read_data(avro_reader_t reader, avro_schema_t writers_schema,
-	       avro_schema_t readers_schema, avro_datum_t * datum)
-{
-	int rval = EINVAL;
-	const avro_encoding_t *enc = &avro_binary_encoding;
-
-	if (!reader || !is_avro_schema(writers_schema) || !datum) {
-		return EINVAL;
-	}
-
-	if (readers_schema == NULL) {
-		readers_schema = writers_schema;
-	} else if (!avro_schema_match(writers_schema, readers_schema)) {
-		return EINVAL;
-	}
-
-	/*
-	 * schema resolution 
-	 */
-	if (!is_avro_union(writers_schema) && is_avro_union(readers_schema)) {
-		struct avro_union_branch_t *branch;
-		struct avro_union_schema_t *union_schema =
-		    avro_schema_to_union(readers_schema);
-
-		for (branch = STAILQ_FIRST(&union_schema->branches);
-		     branch != NULL; branch = STAILQ_NEXT(branch, branches)) {
-			if (avro_schema_match(writers_schema, branch->schema)) {
-				return avro_read_data(reader, writers_schema,
-						      branch->schema, datum);
-			}
-		}
-		return EINVAL;
-	}
-
-	switch (avro_typeof(writers_schema)) {
-	case AVRO_NULL:
-		rval = enc->read_null(reader);
-		*datum = avro_null();
-		break;
-
-	case AVRO_BOOLEAN:
-		{
-			int8_t b;
-			rval = enc->read_boolean(reader, &b);
-			*datum = avro_boolean(b);
-		}
-		break;
-
-	case AVRO_STRING:
-		{
-			char *s;
-			rval = enc->read_string(reader, &s);
-			*datum = avro_string(s);
-		}
-		break;
-
-	case AVRO_INT:
-		{
-			int32_t i;
-			rval = enc->read_int(reader, &i);
-			*datum = avro_int(i);
-		}
-		break;
-
-	case AVRO_LONG:
-		{
-			int64_t l;
-			rval = enc->read_long(reader, &l);
-			*datum = avro_long(l);
-		}
-		break;
-
-	case AVRO_FLOAT:
-		{
-			float f;
-			rval = enc->read_float(reader, &f);
-			*datum = avro_float(f);
-		}
-		break;
-
-	case AVRO_DOUBLE:
-		{
-			double d;
-			rval = enc->read_double(reader, &d);
-			*datum = avro_double(d);
-		}
-		break;
-
-	case AVRO_BYTES:
-		{
-			char *bytes;
-			int64_t len;
-			rval = enc->read_bytes(reader, &bytes, &len);
-			*datum = avro_bytes(bytes, len);
-		}
-		break;
-
-	case AVRO_FIXED:
-		rval =
-		    read_fixed(reader, enc, writers_schema, readers_schema,
-			       datum);
-		break;
-
-	case AVRO_ENUM:
-		rval =
-		    read_enum(reader, enc, writers_schema, readers_schema,
-			      datum);
-		break;
-
-	case AVRO_ARRAY:
-		rval =
-		    read_array(reader, enc,
-			       avro_schema_to_array(writers_schema),
-			       avro_schema_to_array(readers_schema), datum);
-		break;
-
-	case AVRO_MAP:
-		rval =
-		    read_map(reader, enc, avro_schema_to_map(writers_schema),
-			     avro_schema_to_map(readers_schema), datum);
-		break;
-
-	case AVRO_UNION:
-		rval =
-		    read_union(reader, enc, writers_schema, readers_schema,
-			       datum);
-		break;
-
-	case AVRO_RECORD:
-		rval =
-		    read_record(reader, enc, writers_schema, readers_schema,
-				datum);
-		break;
-
-	case AVRO_LINK:
-		rval =
-		    avro_read_data(reader,
-				   (avro_schema_to_link(writers_schema))->to,
-				   readers_schema, datum);
-		break;
-	}
-
-	return rval;
-}
-
-struct validate_st {
-	avro_schema_t expected_schema;
-	int rval;
-};
-
-static int
-schema_map_validate_foreach(char *key, avro_datum_t datum,
-			    struct validate_st *vst)
-{
-	if (!avro_schema_datum_validate(vst->expected_schema, datum)) {
-		vst->rval = 0;
-		return ST_STOP;
-	}
-	return ST_CONTINUE;
-}
-
-int
-avro_schema_datum_validate(avro_schema_t expected_schema, avro_datum_t datum)
-{
-	if (!is_avro_schema(expected_schema) || !is_avro_datum(datum)) {
-		return EINVAL;
-	}
-
-	switch (avro_typeof(expected_schema)) {
-	case AVRO_NULL:
-		return is_avro_null(datum);
-
-	case AVRO_BOOLEAN:
-		return is_avro_boolean(datum);
-
-	case AVRO_STRING:
-		return is_avro_string(datum);
-
-	case AVRO_BYTES:
-		return is_avro_bytes(datum);
-
-	case AVRO_INT:
-		return is_avro_int(datum)
-		    || (is_avro_long(datum)
-			&& (INT_MIN <= avro_datum_to_long(datum)->l
-			    && avro_datum_to_long(datum)->l <= INT_MAX));
-
-	case AVRO_LONG:
-		return is_avro_int(datum) || is_avro_long(datum);
-
-	case AVRO_FLOAT:
-		return is_avro_int(datum) || is_avro_long(datum)
-		    || is_avro_float(datum);
-
-	case AVRO_DOUBLE:
-		return is_avro_int(datum) || is_avro_long(datum)
-		    || is_avro_float(datum) || is_avro_double(datum);
-
-	case AVRO_FIXED:
-		return (is_avro_fixed(datum)
-			&& (avro_schema_to_fixed(expected_schema)->size ==
-			    avro_datum_to_fixed(datum)->size));
-
-	case AVRO_ENUM:
-		{
-			struct avro_enum_schema_t *enump =
-			    avro_schema_to_enum(expected_schema);
-			struct avro_enum_symbol_t *symbol =
-			    STAILQ_FIRST(&enump->symbols);
-			while (symbol) {
-				if (!strcmp
-				    (symbol->symbol,
-				     avro_datum_to_enum(datum)->symbol)) {
-					return 1;
-				}
-				symbol = STAILQ_NEXT(symbol, symbols);
-			}
-			return 0;
-		}
-		break;
-
-	case AVRO_ARRAY:
-		{
-			if (is_avro_array(datum)) {
-				struct avro_array_datum_t *array =
-				    avro_datum_to_array(datum);
-				struct avro_array_element_t *el =
-				    STAILQ_FIRST(&array->els);
-				while (el) {
-					if (!avro_schema_datum_validate
-					    ((avro_schema_to_array
-					      (expected_schema))->items,
-					     el->datum)) {
-						return 0;
-					}
-					el = STAILQ_NEXT(el, els);
-				}
-				return 1;
-			}
-			return 0;
-		}
-		break;
-
-	case AVRO_MAP:
-		if (is_avro_map(datum)) {
-			struct validate_st vst =
-			    { avro_schema_to_map(expected_schema)->values, 1 };
-			st_foreach(avro_datum_to_map(datum)->map,
-				   schema_map_validate_foreach,
-				   (st_data_t) & vst);
-			return vst.rval;
-		}
-		break;
-
-	case AVRO_UNION:
-		{
-			struct avro_union_schema_t *union_schema =
-			    avro_schema_to_union(expected_schema);
-			struct avro_union_branch_t *branch;
-
-			for (branch = STAILQ_FIRST(&union_schema->branches);
-			     branch != NULL;
-			     branch = STAILQ_NEXT(branch, branches)) {
-				if (avro_schema_datum_validate
-				    (branch->schema, datum)) {
-					return 1;
-				}
-			}
-			return 0;
-		}
-		break;
-
-	case AVRO_RECORD:
-		if (is_avro_record(datum)) {
-			struct avro_record_schema_t *record_schema =
-			    avro_schema_to_record(expected_schema);
-			struct avro_record_field_t *field;
-			for (field = STAILQ_FIRST(&record_schema->fields);
-			     field != NULL;
-			     field = STAILQ_NEXT(field, fields)) {
-				avro_datum_t field_datum =
-				    avro_record_field_get(datum, field->name);
-				if (!field_datum) {
-					/*
-					 * TODO: check for default values 
-					 */
-					return 0;
-				}
-				if (!avro_schema_datum_validate
-				    (field->type, field_datum)) {
-					return 0;
-				}
-			}
-			return 1;
-		}
-		break;
-
-	case AVRO_LINK:
-		{
-			return
-			    avro_schema_datum_validate((avro_schema_to_link
-							(expected_schema))->to,
-						       datum);
-		}
-		break;
-	}
-	return 0;
-}
-
-static int
-write_record(avro_writer_t writer, const avro_encoding_t * enc,
-	     avro_schema_t writer_schema, avro_datum_t datum)
-{
-	/*
-	 * TODO 
-	 */
-	return EINVAL;
-}
-
-static int
-write_enum(avro_writer_t writer, const avro_encoding_t * enc,
-	   avro_schema_t writer_schema, avro_datum_t datum)
-{
-	/*
-	 * TODO 
-	 */
-	return EINVAL;
-}
-
-static int
-write_fixed(avro_writer_t writer, const avro_encoding_t * enc,
-	    avro_schema_t writer_schema, avro_datum_t datum)
-{
-	/*
-	 * TODO 
-	 */
-	return EINVAL;
-}
-
-struct write_map_args {
-	int rval;
-	avro_writer_t writer;
-	const avro_encoding_t *enc;
-	avro_schema_t values_schema;
-};
-
-static int
-write_map_foreach(char *key, avro_datum_t datum, struct write_map_args *args)
-{
-	int rval = args->enc->write_string(args->writer, key);
-	if (rval) {
-		args->rval = rval;
-		return ST_STOP;
-	}
-	rval = avro_write_data(args->writer, args->values_schema, datum);
-	if (rval) {
-		args->rval = rval;
-		return ST_STOP;
-	}
-	return ST_CONTINUE;
-}
-
-static int
-write_map(avro_writer_t writer, const avro_encoding_t * enc,
-	  struct avro_map_schema_t *writer_schema,
-	  struct avro_map_datum_t *datum)
-{
-	int rval;
-	struct write_map_args args = { 0, writer, enc, writer_schema->values };
-
-	if (datum->map->num_entries) {
-		rval = enc->write_long(writer, datum->map->num_entries);
-		if (rval) {
-			return rval;
-		}
-		st_foreach(datum->map, write_map_foreach, (st_data_t) & args);
-	}
-	if (!args.rval) {
-		rval = enc->write_long(writer, 0);
-		if (rval) {
-			return rval;
-		}
-		return 0;
-	}
-	return args.rval;
-}
-
-static int
-write_array(avro_writer_t writer, const avro_encoding_t * enc,
-	    struct avro_array_schema_t *schema,
-	    struct avro_array_datum_t *array)
-{
-	int rval;
-	struct avro_array_element_t *el;
-
-	if (array->num_elements) {
-		rval = enc->write_long(writer, array->num_elements);
-		if (rval) {
-			return rval;
-		}
-		for (el = STAILQ_FIRST(&array->els);
-		     el != NULL; el = STAILQ_NEXT(el, els)) {
-			rval =
-			    avro_write_data(writer, schema->items, el->datum);
-			if (rval) {
-				return rval;
-			}
-		}
-	}
-	return enc->write_long(writer, 0);
-}
-
-int
-avro_write_data(avro_writer_t writer, avro_schema_t writer_schema,
-		avro_datum_t datum)
-{
-	const avro_encoding_t *enc = &avro_binary_encoding;
-	int rval = -1;
-
-	if (!writer || !(is_avro_schema(writer_schema) && is_avro_datum(datum))) {
-		return EINVAL;
-	}
-	if (!avro_schema_datum_validate(writer_schema, datum)) {
-		return EINVAL;
-	}
-	switch (avro_typeof(writer_schema)) {
-	case AVRO_NULL:
-		rval = enc->write_null(writer);
-		break;
-	case AVRO_BOOLEAN:
-		rval =
-		    enc->write_boolean(writer, avro_datum_to_boolean(datum)->i);
-		break;
-	case AVRO_STRING:
-		rval =
-		    enc->write_string(writer, avro_datum_to_string(datum)->s);
-		break;
-	case AVRO_BYTES:
-		rval =
-		    enc->write_bytes(writer, avro_datum_to_bytes(datum)->bytes,
-				     avro_datum_to_bytes(datum)->size);
-		break;
-	case AVRO_INT:
-		{
-			int32_t i;
-			if (is_avro_int(datum)) {
-				i = avro_datum_to_int(datum)->i;
-			} else if (is_avro_long(datum)) {
-				i = (int32_t) avro_datum_to_long(datum)->l;
-			} else {
-				assert(0
-				       &&
-				       "Serious bug in schema validation code");
-			}
-			rval = enc->write_int(writer, i);
-		}
-		break;
-	case AVRO_LONG:
-		rval = enc->write_long(writer, avro_datum_to_long(datum)->l);
-		break;
-	case AVRO_FLOAT:
-		{
-			float f;
-			if (is_avro_int(datum)) {
-				f = (float)(avro_datum_to_int(datum)->i);
-			} else if (is_avro_long(datum)) {
-				f = (float)(avro_datum_to_long(datum)->l);
-			} else if (is_avro_float(datum)) {
-				f = avro_datum_to_float(datum)->f;
-			} else if (is_avro_double(datum)) {
-				f = (float)(avro_datum_to_double(datum)->d);
-			} else {
-				assert(0
-				       &&
-				       "Serious bug in schema validation code");
-			}
-			rval = enc->write_float(writer, f);
-		}
-		break;
-	case AVRO_DOUBLE:
-		{
-			double d;
-			if (is_avro_int(datum)) {
-				d = (double)(avro_datum_to_int(datum)->i);
-			} else if (is_avro_long(datum)) {
-				d = (double)(avro_datum_to_long(datum)->l);
-			} else if (is_avro_float(datum)) {
-				d = (double)(avro_datum_to_float(datum)->f);
-			} else if (is_avro_double(datum)) {
-				d = avro_datum_to_double(datum)->d;
-			} else {
-				assert(0 && "Bug in schema validation code");
-			}
-			rval = enc->write_double(writer, d);
-		}
-		break;
-
-	case AVRO_RECORD:
-		rval = write_record(writer, enc, writer_schema, datum);
-		break;
-	case AVRO_ENUM:
-		rval = write_enum(writer, enc, writer_schema, datum);
-		break;
-	case AVRO_FIXED:
-		rval = write_fixed(writer, enc, writer_schema, datum);
-		break;
-	case AVRO_MAP:
-		rval =
-		    write_map(writer, enc, avro_schema_to_map(writer_schema),
-			      avro_datum_to_map(datum));
-		break;
-	case AVRO_ARRAY:
-		rval =
-		    write_array(writer, enc,
-				avro_schema_to_array(writer_schema),
-				avro_datum_to_array(datum));
-		break;
-
-	case AVRO_UNION:
-		{
-			assert(0 && "Bug in schema validation code");
-		}
-		break;
-
-	case AVRO_LINK:
-		rval =
-		    avro_write_data(writer,
-				    (avro_schema_to_link(writer_schema))->to,
-				    datum);
-		break;
-	}
-	return rval;
-}

Added: hadoop/avro/trunk/lang/c/src/datum_equal.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum_equal.c?rev=901950&view=auto
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum_equal.c (added)
+++ hadoop/avro/trunk/lang/c/src/datum_equal.c Fri Jan 22 00:38:01 2010
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0 
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.  See the License for the specific language governing
+ * permissions and limitations under the License. 
+ */
+
+#include <string.h>
+#include "datum.h"
+
+static int
+array_equal(struct avro_array_datum_t *a, struct avro_array_datum_t *b)
+{
+	struct avro_array_element_t *a_el, *b_el;
+	if (a->num_elements != b->num_elements) {
+		return 0;
+	}
+	for (a_el = STAILQ_FIRST(&a->els),
+	     b_el = STAILQ_FIRST(&b->els);
+	     !(a_el == NULL && a_el == NULL);
+	     a_el = STAILQ_NEXT(a_el, els), b_el = STAILQ_NEXT(b_el, els)) {
+		if (a_el == NULL || b_el == NULL) {
+			return 0;	/* different number of elements */
+		}
+		if (!avro_datum_equal(a_el->datum, b_el->datum)) {
+			return 0;
+		}
+	}
+	return 1;
+}
+
+struct st_equal_args {
+	int rval;
+	st_table *st;
+};
+
+static int
+st_equal_foreach(char *key, avro_datum_t datum, struct st_equal_args *args)
+{
+	union {
+		avro_datum_t datum_other;
+		st_data_t data;
+	} val;
+	if (!st_lookup(args->st, (st_data_t) key, &(val.data))) {
+		args->rval = 0;
+		return ST_STOP;
+	}
+	if (!avro_datum_equal(datum, val.datum_other)) {
+		args->rval = 0;
+		return ST_STOP;
+	}
+	return ST_CONTINUE;
+}
+
+static int map_equal(struct avro_map_datum_t *a, struct avro_map_datum_t *b)
+{
+	struct st_equal_args args = { 1, b->map };
+	if (a->map->num_entries != b->map->num_entries) {
+		return 0;
+	}
+	st_foreach(a->map, st_equal_foreach, (st_data_t) & args);
+	return args.rval;
+}
+
+static int record_equal(struct avro_record_datum_t *a,
+			struct avro_record_datum_t *b)
+{
+	struct st_equal_args args = { 1, b->fields };
+	if (a->fields->num_entries != b->fields->num_entries) {
+		fprintf(stderr, "num entries mismatch %d != %d\n",
+			a->fields->num_entries, b->fields->num_entries);
+		return 0;
+	}
+	fprintf(stderr, "Each record has %d values\n", a->fields->num_entries);
+	st_foreach(a->fields, st_equal_foreach, (st_data_t) & args);
+	return args.rval;
+}
+
+int avro_datum_equal(avro_datum_t a, avro_datum_t b)
+{
+	if (!(is_avro_datum(a) && is_avro_datum(b))) {
+		return 0;
+	}
+	if (avro_typeof(a) != avro_typeof(b)) {
+		return 0;
+	}
+	switch (avro_typeof(a)) {
+	case AVRO_STRING:
+		return strcmp(avro_datum_to_string(a)->s,
+			      avro_datum_to_string(b)->s) == 0;
+	case AVRO_BYTES:
+		return (avro_datum_to_bytes(a)->size ==
+			avro_datum_to_bytes(b)->size)
+		    && memcmp(avro_datum_to_bytes(a)->bytes,
+			      avro_datum_to_bytes(b)->bytes,
+			      avro_datum_to_bytes(a)->size) == 0;
+	case AVRO_INT:
+		return avro_datum_to_int(a)->i == avro_datum_to_int(b)->i;
+	case AVRO_LONG:
+		return avro_datum_to_long(a)->l == avro_datum_to_long(b)->l;
+	case AVRO_FLOAT:
+		return avro_datum_to_float(a)->f == avro_datum_to_float(b)->f;
+	case AVRO_DOUBLE:
+		return avro_datum_to_double(a)->d == avro_datum_to_double(b)->d;
+	case AVRO_BOOLEAN:
+		return avro_datum_to_boolean(a)->i ==
+		    avro_datum_to_boolean(b)->i;
+	case AVRO_NULL:
+		return 1;
+	case AVRO_ARRAY:
+		return array_equal(avro_datum_to_array(a),
+				   avro_datum_to_array(b));
+	case AVRO_MAP:
+		return map_equal(avro_datum_to_map(a), avro_datum_to_map(b));
+
+	case AVRO_RECORD:
+		return record_equal(avro_datum_to_record(a),
+				    avro_datum_to_record(b));
+
+	case AVRO_ENUM:
+	case AVRO_FIXED:
+	case AVRO_UNION:
+	case AVRO_LINK:
+		/*
+		 * TODO 
+		 */
+		return 0;
+	}
+	return 0;
+}

Added: hadoop/avro/trunk/lang/c/src/datum_read.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum_read.c?rev=901950&view=auto
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum_read.c (added)
+++ hadoop/avro/trunk/lang/c/src/datum_read.c Fri Jan 22 00:38:01 2010
@@ -0,0 +1,409 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0 
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.  See the License for the specific language governing
+ * permissions and limitations under the License. 
+ */
+#include <errno.h>
+#include <string.h>
+#include "encoding.h"
+#include "schema.h"
+#include "datum.h"
+
+int
+avro_schema_match(avro_schema_t writers_schema, avro_schema_t readers_schema)
+{
+	if (!is_avro_schema(writers_schema) || !is_avro_schema(readers_schema)) {
+		return 0;
+	}
+
+	switch (avro_typeof(writers_schema)) {
+	case AVRO_UNION:
+		return 1;
+
+	case AVRO_INT:
+		return is_avro_int(readers_schema)
+		    || is_avro_long(readers_schema)
+		    || is_avro_float(readers_schema)
+		    || is_avro_double(readers_schema);
+
+	case AVRO_LONG:
+		return is_avro_long(readers_schema)
+		    || is_avro_float(readers_schema)
+		    || is_avro_double(readers_schema);
+
+	case AVRO_FLOAT:
+		return is_avro_float(readers_schema)
+		    || is_avro_double(readers_schema);
+
+	case AVRO_STRING:
+	case AVRO_BYTES:
+	case AVRO_DOUBLE:
+	case AVRO_BOOLEAN:
+	case AVRO_NULL:
+		return avro_typeof(writers_schema) ==
+		    avro_typeof(readers_schema);
+
+	case AVRO_RECORD:
+		return is_avro_record(readers_schema)
+		    && strcmp(avro_schema_name(writers_schema),
+			      avro_schema_name(readers_schema)) == 0;
+
+	case AVRO_FIXED:
+		return is_avro_fixed(readers_schema)
+		    && strcmp(avro_schema_name(writers_schema),
+			      avro_schema_name(readers_schema)) == 0
+		    && (avro_schema_to_fixed(writers_schema))->size ==
+		    (avro_schema_to_fixed(readers_schema))->size;
+
+	case AVRO_ENUM:
+		return is_avro_enum(readers_schema)
+		    && strcmp(avro_schema_to_enum(writers_schema)->name,
+			      avro_schema_to_enum(readers_schema)->name) == 0;
+
+	case AVRO_MAP:
+		return is_avro_map(readers_schema)
+		    && avro_typeof(avro_schema_to_map(writers_schema)->values)
+		    == avro_typeof(avro_schema_to_map(readers_schema)->values);
+
+	case AVRO_ARRAY:
+		return is_avro_array(readers_schema)
+		    && avro_typeof(avro_schema_to_array(writers_schema)->items)
+		    == avro_typeof(avro_schema_to_array(readers_schema)->items);
+
+	case AVRO_LINK:
+		/*
+		 * TODO 
+		 */
+		break;
+	}
+
+	return 0;
+}
+
+static int
+read_fixed(avro_reader_t reader, const avro_encoding_t * enc,
+	   avro_schema_t writers_schema, avro_schema_t readers_schema,
+	   avro_datum_t * datum)
+{
+	return 1;
+}
+
+static int
+read_enum(avro_reader_t reader, const avro_encoding_t * enc,
+	  avro_schema_t writers_schema, avro_schema_t readers_schema,
+	  avro_datum_t * datum)
+{
+	return 1;
+}
+
+static int
+read_array(avro_reader_t reader, const avro_encoding_t * enc,
+	   struct avro_array_schema_t *writers_schema,
+	   struct avro_array_schema_t *readers_schema, avro_datum_t * datum)
+{
+	int rval;
+	int64_t i;
+	int64_t block_count;
+	int64_t block_size;
+	avro_datum_t array_datum;
+
+	rval = enc->read_long(reader, &block_count);
+	if (rval) {
+		return rval;
+	}
+
+	array_datum = avro_array();
+	while (block_count != 0) {
+		if (block_count < 0) {
+			block_count = block_count * -1;
+			rval = enc->read_long(reader, &block_size);
+			if (rval) {
+				return rval;
+			}
+		}
+
+		for (i = 0; i < block_count; i++) {
+			avro_datum_t datum;
+
+			rval =
+			    avro_read_data(reader, writers_schema->items,
+					   readers_schema->items, &datum);
+			if (rval) {
+				return rval;
+			}
+			rval = avro_array_append_datum(array_datum, datum);
+			if (rval) {
+				avro_datum_decref(array_datum);
+				return rval;
+			}
+		}
+
+		rval = enc->read_long(reader, &block_count);
+		if (rval) {
+			return rval;
+		}
+	}
+	*datum = array_datum;
+	return 0;
+}
+
+static int
+read_map(avro_reader_t reader, const avro_encoding_t * enc,
+	 struct avro_map_schema_t *writers_schema,
+	 struct avro_map_schema_t *readers_schema, avro_datum_t * datum)
+{
+	int rval;
+	int64_t i, block_count;
+	avro_datum_t map = avro_map();
+
+	rval = enc->read_long(reader, &block_count);
+	if (rval) {
+		return rval;
+	}
+	while (block_count != 0) {
+		int64_t block_size;
+		if (block_count < 0) {
+			block_count = block_count * -1;
+			rval = enc->read_long(reader, &block_size);
+			if (rval) {
+				return rval;
+			}
+		}
+		for (i = 0; i < block_count; i++) {
+			char *key;
+			avro_datum_t value;
+			rval = enc->read_string(reader, &key);
+			if (rval) {
+				return rval;
+			}
+			rval =
+			    avro_read_data(reader,
+					   avro_schema_to_map(writers_schema)->
+					   values,
+					   avro_schema_to_map(readers_schema)->
+					   values, &value);
+			if (rval) {
+				return rval;
+			}
+			rval = avro_map_set(map, key, value);
+			if (rval) {
+				return rval;
+			}
+		}
+		rval = enc->read_long(reader, &block_count);
+		if (rval) {
+			return rval;
+		}
+	}
+	*datum = map;
+	return 0;
+}
+
+static int
+read_union(avro_reader_t reader, const avro_encoding_t * enc,
+	   avro_schema_t writers_schema, avro_schema_t readers_schema,
+	   avro_datum_t * datum)
+{
+	return 1;
+}
+
+/* TODO: handle default values in fields */
+static int
+read_record(avro_reader_t reader, const avro_encoding_t * enc,
+	    struct avro_record_schema_t *writers_schema,
+	    struct avro_record_schema_t *readers_schema, avro_datum_t * datum)
+{
+	int rval;
+	struct avro_record_field_t *reader_field;
+	struct avro_record_field_t *field;
+	avro_datum_t record;
+	avro_datum_t field_datum;
+
+	record = *datum = avro_record(writers_schema->name);
+	for (field = STAILQ_FIRST(&writers_schema->fields);
+	     field != NULL; field = STAILQ_NEXT(field, fields)) {
+		for (reader_field = STAILQ_FIRST(&readers_schema->fields);
+		     reader_field != NULL;
+		     reader_field = STAILQ_NEXT(reader_field, fields)) {
+			if (strcmp(field->name, reader_field->name) == 0) {
+				break;
+			}
+		}
+		if (reader_field) {
+			rval =
+			    avro_read_data(reader, field->type,
+					   reader_field->type, &field_datum);
+			if (rval) {
+				return rval;
+			}
+			rval =
+			    avro_record_field_set(record, field->name,
+						  field_datum);
+			if (rval) {
+				return rval;
+			}
+		} else {
+			/* TODO: skip_record */
+			return -1;
+		}
+	}
+	return 0;
+}
+
+int
+avro_read_data(avro_reader_t reader, avro_schema_t writers_schema,
+	       avro_schema_t readers_schema, avro_datum_t * datum)
+{
+	int rval = EINVAL;
+	const avro_encoding_t *enc = &avro_binary_encoding;
+
+	if (!reader || !is_avro_schema(writers_schema) || !datum) {
+		return EINVAL;
+	}
+
+	if (readers_schema == NULL) {
+		readers_schema = writers_schema;
+	} else if (!avro_schema_match(writers_schema, readers_schema)) {
+		return EINVAL;
+	}
+
+	/*
+	 * schema resolution 
+	 */
+	if (!is_avro_union(writers_schema) && is_avro_union(readers_schema)) {
+		struct avro_union_branch_t *branch;
+		struct avro_union_schema_t *union_schema =
+		    avro_schema_to_union(readers_schema);
+
+		for (branch = STAILQ_FIRST(&union_schema->branches);
+		     branch != NULL; branch = STAILQ_NEXT(branch, branches)) {
+			if (avro_schema_match(writers_schema, branch->schema)) {
+				return avro_read_data(reader, writers_schema,
+						      branch->schema, datum);
+			}
+		}
+		return EINVAL;
+	}
+
+	switch (avro_typeof(writers_schema)) {
+	case AVRO_NULL:
+		rval = enc->read_null(reader);
+		*datum = avro_null();
+		break;
+
+	case AVRO_BOOLEAN:
+		{
+			int8_t b;
+			rval = enc->read_boolean(reader, &b);
+			*datum = avro_boolean(b);
+		}
+		break;
+
+	case AVRO_STRING:
+		{
+			char *s;
+			rval = enc->read_string(reader, &s);
+			*datum = avro_string(s);
+		}
+		break;
+
+	case AVRO_INT:
+		{
+			int32_t i;
+			rval = enc->read_int(reader, &i);
+			*datum = avro_int(i);
+		}
+		break;
+
+	case AVRO_LONG:
+		{
+			int64_t l;
+			rval = enc->read_long(reader, &l);
+			*datum = avro_long(l);
+		}
+		break;
+
+	case AVRO_FLOAT:
+		{
+			float f;
+			rval = enc->read_float(reader, &f);
+			*datum = avro_float(f);
+		}
+		break;
+
+	case AVRO_DOUBLE:
+		{
+			double d;
+			rval = enc->read_double(reader, &d);
+			*datum = avro_double(d);
+		}
+		break;
+
+	case AVRO_BYTES:
+		{
+			char *bytes;
+			int64_t len;
+			rval = enc->read_bytes(reader, &bytes, &len);
+			*datum = avro_bytes(bytes, len);
+		}
+		break;
+
+	case AVRO_FIXED:
+		rval =
+		    read_fixed(reader, enc, writers_schema, readers_schema,
+			       datum);
+		break;
+
+	case AVRO_ENUM:
+		rval =
+		    read_enum(reader, enc, writers_schema, readers_schema,
+			      datum);
+		break;
+
+	case AVRO_ARRAY:
+		rval =
+		    read_array(reader, enc,
+			       avro_schema_to_array(writers_schema),
+			       avro_schema_to_array(readers_schema), datum);
+		break;
+
+	case AVRO_MAP:
+		rval =
+		    read_map(reader, enc, avro_schema_to_map(writers_schema),
+			     avro_schema_to_map(readers_schema), datum);
+		break;
+
+	case AVRO_UNION:
+		rval =
+		    read_union(reader, enc, writers_schema, readers_schema,
+			       datum);
+		break;
+
+	case AVRO_RECORD:
+		rval =
+		    read_record(reader, enc,
+				avro_schema_to_record(writers_schema),
+				avro_schema_to_record(readers_schema), datum);
+		break;
+
+	case AVRO_LINK:
+		rval =
+		    avro_read_data(reader,
+				   (avro_schema_to_link(writers_schema))->to,
+				   readers_schema, datum);
+		break;
+	}
+
+	return rval;
+}

Added: hadoop/avro/trunk/lang/c/src/datum_validate.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum_validate.c?rev=901950&view=auto
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum_validate.c (added)
+++ hadoop/avro/trunk/lang/c/src/datum_validate.c Fri Jan 22 00:38:01 2010
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0 
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.  See the License for the specific language governing
+ * permissions and limitations under the License. 
+ */
+#include <limits.h>
+#include <errno.h>
+#include <string.h>
+#include "schema.h"
+#include "datum.h"
+#include "st.h"
+
+struct validate_st {
+	avro_schema_t expected_schema;
+	int rval;
+};
+
+static int
+schema_map_validate_foreach(char *key, avro_datum_t datum,
+			    struct validate_st *vst)
+{
+	if (!avro_schema_datum_validate(vst->expected_schema, datum)) {
+		vst->rval = 0;
+		return ST_STOP;
+	}
+	return ST_CONTINUE;
+}
+
+int
+avro_schema_datum_validate(avro_schema_t expected_schema, avro_datum_t datum)
+{
+	if (!is_avro_schema(expected_schema) || !is_avro_datum(datum)) {
+		return EINVAL;
+	}
+
+	switch (avro_typeof(expected_schema)) {
+	case AVRO_NULL:
+		return is_avro_null(datum);
+
+	case AVRO_BOOLEAN:
+		return is_avro_boolean(datum);
+
+	case AVRO_STRING:
+		return is_avro_string(datum);
+
+	case AVRO_BYTES:
+		return is_avro_bytes(datum);
+
+	case AVRO_INT:
+		return is_avro_int(datum)
+		    || (is_avro_long(datum)
+			&& (INT_MIN <= avro_datum_to_long(datum)->l
+			    && avro_datum_to_long(datum)->l <= INT_MAX));
+
+	case AVRO_LONG:
+		return is_avro_int(datum) || is_avro_long(datum);
+
+	case AVRO_FLOAT:
+		return is_avro_int(datum) || is_avro_long(datum)
+		    || is_avro_float(datum);
+
+	case AVRO_DOUBLE:
+		return is_avro_int(datum) || is_avro_long(datum)
+		    || is_avro_float(datum) || is_avro_double(datum);
+
+	case AVRO_FIXED:
+		return (is_avro_fixed(datum)
+			&& (avro_schema_to_fixed(expected_schema)->size ==
+			    avro_datum_to_fixed(datum)->size));
+
+	case AVRO_ENUM:
+		{
+			struct avro_enum_schema_t *enump =
+			    avro_schema_to_enum(expected_schema);
+			struct avro_enum_symbol_t *symbol =
+			    STAILQ_FIRST(&enump->symbols);
+			while (symbol) {
+				if (!strcmp
+				    (symbol->symbol,
+				     avro_datum_to_enum(datum)->symbol)) {
+					return 1;
+				}
+				symbol = STAILQ_NEXT(symbol, symbols);
+			}
+			return 0;
+		}
+		break;
+
+	case AVRO_ARRAY:
+		{
+			if (is_avro_array(datum)) {
+				struct avro_array_datum_t *array =
+				    avro_datum_to_array(datum);
+				struct avro_array_element_t *el =
+				    STAILQ_FIRST(&array->els);
+				while (el) {
+					if (!avro_schema_datum_validate
+					    ((avro_schema_to_array
+					      (expected_schema))->items,
+					     el->datum)) {
+						return 0;
+					}
+					el = STAILQ_NEXT(el, els);
+				}
+				return 1;
+			}
+			return 0;
+		}
+		break;
+
+	case AVRO_MAP:
+		if (is_avro_map(datum)) {
+			struct validate_st vst =
+			    { avro_schema_to_map(expected_schema)->values, 1 };
+			st_foreach(avro_datum_to_map(datum)->map,
+				   schema_map_validate_foreach,
+				   (st_data_t) & vst);
+			return vst.rval;
+		}
+		break;
+
+	case AVRO_UNION:
+		{
+			struct avro_union_schema_t *union_schema =
+			    avro_schema_to_union(expected_schema);
+			struct avro_union_branch_t *branch;
+
+			for (branch = STAILQ_FIRST(&union_schema->branches);
+			     branch != NULL;
+			     branch = STAILQ_NEXT(branch, branches)) {
+				if (avro_schema_datum_validate
+				    (branch->schema, datum)) {
+					return 1;
+				}
+			}
+			return 0;
+		}
+		break;
+
+	case AVRO_RECORD:
+		if (is_avro_record(datum)) {
+			struct avro_record_schema_t *record_schema =
+			    avro_schema_to_record(expected_schema);
+			struct avro_record_field_t *field;
+			for (field = STAILQ_FIRST(&record_schema->fields);
+			     field != NULL;
+			     field = STAILQ_NEXT(field, fields)) {
+				avro_datum_t field_datum =
+				    avro_record_field_get(datum, field->name);
+				if (!field_datum) {
+					/*
+					 * TODO: check for default values 
+					 */
+					return 0;
+				}
+				if (!avro_schema_datum_validate
+				    (field->type, field_datum)) {
+					return 0;
+				}
+			}
+			return 1;
+		}
+		break;
+
+	case AVRO_LINK:
+		{
+			return
+			    avro_schema_datum_validate((avro_schema_to_link
+							(expected_schema))->to,
+						       datum);
+		}
+		break;
+	}
+	return 0;
+}

Added: hadoop/avro/trunk/lang/c/src/datum_write.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum_write.c?rev=901950&view=auto
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum_write.c (added)
+++ hadoop/avro/trunk/lang/c/src/datum_write.c Fri Jan 22 00:38:01 2010
@@ -0,0 +1,255 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0 
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.  See the License for the specific language governing
+ * permissions and limitations under the License. 
+ */
+#include <errno.h>
+#include <assert.h>
+#include "schema.h"
+#include "datum.h"
+#include "encoding.h"
+
+static int
+write_record(avro_writer_t writer, const avro_encoding_t * enc,
+	     struct avro_record_schema_t *record, avro_datum_t datum)
+{
+	int rval;
+	struct avro_record_field_t *field = STAILQ_FIRST(&record->fields);
+	for (; field != NULL; field = STAILQ_NEXT(field, fields)) {
+		rval = avro_write_data(writer, field->type,
+				       avro_record_field_get(datum,
+							     field->name));
+		if (rval) {
+			return rval;
+		}
+	}
+	return 0;
+}
+
+static int
+write_enum(avro_writer_t writer, const avro_encoding_t * enc,
+	   avro_schema_t writer_schema, avro_datum_t datum)
+{
+	/*
+	 * TODO 
+	 */
+	return EINVAL;
+}
+
+static int
+write_fixed(avro_writer_t writer, const avro_encoding_t * enc,
+	    avro_schema_t writer_schema, avro_datum_t datum)
+{
+	/*
+	 * TODO 
+	 */
+	return EINVAL;
+}
+
+struct write_map_args {
+	int rval;
+	avro_writer_t writer;
+	const avro_encoding_t *enc;
+	avro_schema_t values_schema;
+};
+
+static int
+write_map_foreach(char *key, avro_datum_t datum, struct write_map_args *args)
+{
+	int rval = args->enc->write_string(args->writer, key);
+	if (rval) {
+		args->rval = rval;
+		return ST_STOP;
+	}
+	rval = avro_write_data(args->writer, args->values_schema, datum);
+	if (rval) {
+		args->rval = rval;
+		return ST_STOP;
+	}
+	return ST_CONTINUE;
+}
+
+static int
+write_map(avro_writer_t writer, const avro_encoding_t * enc,
+	  struct avro_map_schema_t *writer_schema,
+	  struct avro_map_datum_t *datum)
+{
+	int rval;
+	struct write_map_args args = { 0, writer, enc, writer_schema->values };
+
+	if (datum->map->num_entries) {
+		rval = enc->write_long(writer, datum->map->num_entries);
+		if (rval) {
+			return rval;
+		}
+		st_foreach(datum->map, write_map_foreach, (st_data_t) & args);
+	}
+	if (!args.rval) {
+		rval = enc->write_long(writer, 0);
+		if (rval) {
+			return rval;
+		}
+		return 0;
+	}
+	return args.rval;
+}
+
+static int
+write_array(avro_writer_t writer, const avro_encoding_t * enc,
+	    struct avro_array_schema_t *schema,
+	    struct avro_array_datum_t *array)
+{
+	int rval;
+	struct avro_array_element_t *el;
+
+	if (array->num_elements) {
+		rval = enc->write_long(writer, array->num_elements);
+		if (rval) {
+			return rval;
+		}
+		for (el = STAILQ_FIRST(&array->els);
+		     el != NULL; el = STAILQ_NEXT(el, els)) {
+			rval =
+			    avro_write_data(writer, schema->items, el->datum);
+			if (rval) {
+				return rval;
+			}
+		}
+	}
+	return enc->write_long(writer, 0);
+}
+
+int
+avro_write_data(avro_writer_t writer, avro_schema_t writer_schema,
+		avro_datum_t datum)
+{
+	const avro_encoding_t *enc = &avro_binary_encoding;
+	int rval = -1;
+
+	if (!writer || !(is_avro_schema(writer_schema) && is_avro_datum(datum))) {
+		return EINVAL;
+	}
+	if (!avro_schema_datum_validate(writer_schema, datum)) {
+		return EINVAL;
+	}
+	switch (avro_typeof(writer_schema)) {
+	case AVRO_NULL:
+		rval = enc->write_null(writer);
+		break;
+	case AVRO_BOOLEAN:
+		rval =
+		    enc->write_boolean(writer, avro_datum_to_boolean(datum)->i);
+		break;
+	case AVRO_STRING:
+		rval =
+		    enc->write_string(writer, avro_datum_to_string(datum)->s);
+		break;
+	case AVRO_BYTES:
+		rval =
+		    enc->write_bytes(writer, avro_datum_to_bytes(datum)->bytes,
+				     avro_datum_to_bytes(datum)->size);
+		break;
+	case AVRO_INT:
+		{
+			int32_t i;
+			if (is_avro_int(datum)) {
+				i = avro_datum_to_int(datum)->i;
+			} else if (is_avro_long(datum)) {
+				i = (int32_t) avro_datum_to_long(datum)->l;
+			} else {
+				assert(0
+				       &&
+				       "Serious bug in schema validation code");
+			}
+			rval = enc->write_int(writer, i);
+		}
+		break;
+	case AVRO_LONG:
+		rval = enc->write_long(writer, avro_datum_to_long(datum)->l);
+		break;
+	case AVRO_FLOAT:
+		{
+			float f;
+			if (is_avro_int(datum)) {
+				f = (float)(avro_datum_to_int(datum)->i);
+			} else if (is_avro_long(datum)) {
+				f = (float)(avro_datum_to_long(datum)->l);
+			} else if (is_avro_float(datum)) {
+				f = avro_datum_to_float(datum)->f;
+			} else if (is_avro_double(datum)) {
+				f = (float)(avro_datum_to_double(datum)->d);
+			} else {
+				assert(0
+				       &&
+				       "Serious bug in schema validation code");
+			}
+			rval = enc->write_float(writer, f);
+		}
+		break;
+	case AVRO_DOUBLE:
+		{
+			double d;
+			if (is_avro_int(datum)) {
+				d = (double)(avro_datum_to_int(datum)->i);
+			} else if (is_avro_long(datum)) {
+				d = (double)(avro_datum_to_long(datum)->l);
+			} else if (is_avro_float(datum)) {
+				d = (double)(avro_datum_to_float(datum)->f);
+			} else if (is_avro_double(datum)) {
+				d = avro_datum_to_double(datum)->d;
+			} else {
+				assert(0 && "Bug in schema validation code");
+			}
+			rval = enc->write_double(writer, d);
+		}
+		break;
+
+	case AVRO_RECORD:
+		rval =
+		    write_record(writer, enc,
+				 avro_schema_to_record(writer_schema), datum);
+		break;
+	case AVRO_ENUM:
+		rval = write_enum(writer, enc, writer_schema, datum);
+		break;
+	case AVRO_FIXED:
+		rval = write_fixed(writer, enc, writer_schema, datum);
+		break;
+	case AVRO_MAP:
+		rval =
+		    write_map(writer, enc, avro_schema_to_map(writer_schema),
+			      avro_datum_to_map(datum));
+		break;
+	case AVRO_ARRAY:
+		rval =
+		    write_array(writer, enc,
+				avro_schema_to_array(writer_schema),
+				avro_datum_to_array(datum));
+		break;
+
+	case AVRO_UNION:
+		{
+			assert(0 && "Bug in schema validation code");
+		}
+		break;
+
+	case AVRO_LINK:
+		rval =
+		    avro_write_data(writer,
+				    (avro_schema_to_link(writer_schema))->to,
+				    datum);
+		break;
+	}
+	return rval;
+}

Modified: hadoop/avro/trunk/lang/c/src/io.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/io.c?rev=901950&r1=901949&r2=901950&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/io.c (original)
+++ hadoop/avro/trunk/lang/c/src/io.c Fri Jan 22 00:38:01 2010
@@ -21,6 +21,7 @@
 #include <string.h>
 #include "avro.h"
 #include "container_of.h"
+#include "dump.h"
 
 enum avro_io_type_t {
 	AVRO_FILE_IO,
@@ -215,6 +216,22 @@
 	return -1;
 }
 
+void avro_writer_dump(avro_writer_t writer, FILE * fp)
+{
+	if (is_memory_io(writer)) {
+		dump(fp, (char *)avro_writer_to_memory(writer)->buf,
+		     avro_writer_to_memory(writer)->written);
+	}
+}
+
+void avro_reader_dump(avro_reader_t reader, FILE * fp)
+{
+	if (is_memory_io(reader)) {
+		dump(fp, (char *)avro_reader_to_memory(reader)->buf,
+		     avro_reader_to_memory(reader)->read);
+	}
+}
+
 void avro_reader_free(avro_reader_t reader)
 {
 	if (is_memory_io(reader)) {

Modified: hadoop/avro/trunk/lang/c/src/schema.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/schema.c?rev=901950&r1=901949&r2=901950&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/schema.c (original)
+++ hadoop/avro/trunk/lang/c/src/schema.c Fri Jan 22 00:38:01 2010
@@ -307,9 +307,12 @@
 find_named_schemas(const char *name, avro_schema_error_t * error)
 {
 	st_table *st = (*error)->named_schemas;
-	avro_schema_t schema;
-	if (st_lookup(st, (st_data_t) name, (st_data_t *) & schema)) {
-		return schema;
+	union {
+		avro_schema_t schema;
+		st_data_t data;
+	} val;
+	if (st_lookup(st, (st_data_t) name, &(val.data))) {
+		return val.schema;
 	}
 	return NULL;
 };

Modified: hadoop/avro/trunk/lang/c/src/schema.h
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/schema.h?rev=901950&r1=901949&r2=901950&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/schema.h (original)
+++ hadoop/avro/trunk/lang/c/src/schema.h Fri Jan 22 00:38:01 2010
@@ -33,6 +33,7 @@
 struct avro_record_schema_t {
 	struct avro_obj_t obj;
 	char *name;
+	/* TODO: st_table of names for faster lookup on record_read() */
 	 STAILQ_HEAD(fields, avro_record_field_t) fields;
 };
 

Modified: hadoop/avro/trunk/lang/c/tests/test_avro_data.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/tests/test_avro_data.c?rev=901950&r1=901949&r2=901950&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/tests/test_avro_data.c (original)
+++ hadoop/avro/trunk/lang/c/tests/test_avro_data.c Fri Jan 22 00:38:01 2010
@@ -46,7 +46,7 @@
 		fprintf(stderr, "Unable to encode/decode %s\n", type);
 		exit(EXIT_FAILURE);
 	}
-
+	avro_reader_dump(reader, stderr);
 	avro_datum_decref(datum_out);
 	avro_reader_free(reader);
 	avro_writer_free(writer);
@@ -104,7 +104,7 @@
 	return 0;
 }
 
-static test_double(void)
+static int test_double(void)
 {
 	int i;
 	for (i = 0; i < 100; i++) {
@@ -131,9 +131,9 @@
 static int test_boolean(void)
 {
 	int i;
-	for (i = 0; i < 100; i++) {
+	for (i = 0; i <= 1; i++) {
 		avro_schema_t schema = avro_schema_boolean();
-		avro_datum_t datum = avro_boolean(rand() % 2);
+		avro_datum_t datum = avro_boolean(i);
 		write_read_check(schema, NULL, datum, "boolean");
 		avro_datum_decref(datum);
 	}
@@ -149,15 +149,22 @@
 	return 0;
 }
 
-int test_record(void)
+static int test_record(void)
 {
-	/*
-	 * TODO 
-	 */
+	avro_schema_t schema = avro_schema_record("person");
+	avro_datum_t datum = avro_record("person");
+
+	avro_schema_record_field_append(schema, "name", avro_schema_string());
+	avro_schema_record_field_append(schema, "age", avro_schema_int());
+
+	avro_record_field_set(datum, "name", avro_string("Joseph Campbell"));
+	avro_record_field_set(datum, "age", avro_int(83));
+
+	write_read_check(schema, NULL, datum, "record");
 	return 0;
 }
 
-int test_enum(void)
+static int test_enum(void)
 {
 	/*
 	 * TODO 
@@ -165,7 +172,7 @@
 	return 0;
 }
 
-int test_array(void)
+static int test_array(void)
 {
 	int i, rval;
 	avro_schema_t schema = avro_schema_array(avro_schema_int());
@@ -182,7 +189,7 @@
 	return 0;
 }
 
-int test_map(void)
+static int test_map(void)
 {
 	avro_schema_t schema = avro_schema_map(avro_schema_long());
 	avro_datum_t datum = avro_map();
@@ -198,7 +205,7 @@
 	return 0;
 }
 
-int test_union(void)
+static int test_union(void)
 {
 	/*
 	 * TODO 
@@ -206,7 +213,7 @@
 	return 0;
 }
 
-int test_fixed(void)
+static int test_fixed(void)
 {
 	/*
 	 * TODO 
@@ -240,12 +247,10 @@
 	srandom(time(NULL));
 	for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
 		struct avro_tests *test = tests + i;
-		fprintf(stderr, "Running %s tests...\n", test->name);
+		fprintf(stderr, "**** Running %s tests ****\n", test->name);
 		if (test->func() != 0) {
-			fprintf(stderr, "failed!\n");
 			return EXIT_FAILURE;
 		}
-		fprintf(stderr, "\t... %s tests passed!\n", test->name);
 	}
 	return EXIT_SUCCESS;
 }

Modified: hadoop/avro/trunk/lang/c/version.sh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/version.sh?rev=901950&r1=901949&r2=901950&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/version.sh (original)
+++ hadoop/avro/trunk/lang/c/version.sh Fri Jan 22 00:38:01 2010
@@ -18,9 +18,9 @@
 #         libavro_binary_age = 0
 #         libavro_interface_age = 0
 #
-libavro_micro_version=6
+libavro_micro_version=7
 libavro_interface_age=0
-libavro_binary_age=1
+libavro_binary_age=2
 
 # IGNORE EVERYTHING ELSE FROM HERE DOWN.........
 if test $# != 1; then