You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by ma...@apache.org on 2010/01/22 01:38:03 UTC
svn commit: r901950 - in /hadoop/avro/trunk: ./ lang/c/ lang/c/src/
lang/c/tests/
Author: massie
Date: Fri Jan 22 00:38:01 2010
New Revision: 901950
URL: http://svn.apache.org/viewvc?rev=901950&view=rev
Log:
AVRO-364. Add support for encoding/decoding records
Added:
hadoop/avro/trunk/lang/c/src/datum_equal.c
hadoop/avro/trunk/lang/c/src/datum_read.c
hadoop/avro/trunk/lang/c/src/datum_validate.c
hadoop/avro/trunk/lang/c/src/datum_write.c
Modified:
hadoop/avro/trunk/CHANGES.txt
hadoop/avro/trunk/lang/c/src/Makefile.am
hadoop/avro/trunk/lang/c/src/avro.h
hadoop/avro/trunk/lang/c/src/datum.c
hadoop/avro/trunk/lang/c/src/io.c
hadoop/avro/trunk/lang/c/src/schema.c
hadoop/avro/trunk/lang/c/src/schema.h
hadoop/avro/trunk/lang/c/tests/test_avro_data.c
hadoop/avro/trunk/lang/c/version.sh
Modified: hadoop/avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/CHANGES.txt?rev=901950&r1=901949&r2=901950&view=diff
==============================================================================
--- hadoop/avro/trunk/CHANGES.txt (original)
+++ hadoop/avro/trunk/CHANGES.txt Fri Jan 22 00:38:01 2010
@@ -238,6 +238,8 @@
AVRO-362. Add test to ensure Python implementation handles Union schema
with two fixed types of different names (hammer)
+ AVRO-364. Add support for encoding/decoding records (massie)
+
OPTIMIZATIONS
AVRO-172. More efficient schema processing (massie)
Modified: hadoop/avro/trunk/lang/c/src/Makefile.am
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/Makefile.am?rev=901950&r1=901949&r2=901950&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/Makefile.am (original)
+++ hadoop/avro/trunk/lang/c/src/Makefile.am Fri Jan 22 00:38:01 2010
@@ -6,8 +6,10 @@
include_HEADERS = avro.h
lib_LTLIBRARIES = libavro.la
-libavro_la_SOURCES = st.c st.h schema.c schema.h schema_printf.c schema_equal.c datum.c datum.h \
-io.c dump.c dump.h encoding_binary.c container_of.h queue.h encoding.h
+libavro_la_SOURCES = st.c st.h schema.c schema.h schema_printf.c schema_equal.c \
+datum.c datum_equal.c datum_validate.c datum_read.c datum_write.c datum.h \
+io.c dump.c dump.h encoding_binary.c \
+container_of.h queue.h encoding.h
libavro_la_LIBADD = $(top_builddir)/jansson/src/.libs/libjansson.a
libavro_la_LDFLAGS = \
-version-info $(LIBAVRO_VERSION) \
Modified: hadoop/avro/trunk/lang/c/src/avro.h
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/avro.h?rev=901950&r1=901949&r2=901950&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/avro.h (original)
+++ hadoop/avro/trunk/lang/c/src/avro.h Fri Jan 22 00:38:01 2010
@@ -154,6 +154,9 @@
int avro_write(avro_writer_t writer, void *buf, int64_t len);
int avro_flush(avro_writer_t writer);
+void avro_writer_dump(avro_writer_t writer, FILE * fp);
+void avro_reader_dump(avro_reader_t reader, FILE * fp);
+
void avro_reader_free(avro_reader_t reader);
void avro_writer_free(avro_writer_t writer);
Modified: hadoop/avro/trunk/lang/c/src/datum.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum.c?rev=901950&r1=901949&r2=901950&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum.c (original)
+++ hadoop/avro/trunk/lang/c/src/datum.c Fri Jan 22 00:38:01 2010
@@ -31,109 +31,6 @@
datum->refcount = 1;
}
-static int
-array_equal(struct avro_array_datum_t *a, struct avro_array_datum_t *b)
-{
- struct avro_array_element_t *a_el, *b_el;
- if (a->num_elements != b->num_elements) {
- return 0;
- }
- for (a_el = STAILQ_FIRST(&a->els),
- b_el = STAILQ_FIRST(&b->els);
- !(a_el == NULL && a_el == NULL);
- a_el = STAILQ_NEXT(a_el, els), b_el = STAILQ_NEXT(b_el, els)) {
- if (a_el == NULL || b_el == NULL) {
- return 0; /* different number of elements */
- }
- if (!avro_datum_equal(a_el->datum, b_el->datum)) {
- return 0;
- }
- }
- return 1;
-}
-
-struct map_equal_args {
- int rval;
- st_table *st;
-};
-
-static int
-map_equal_foreach(char *key, avro_datum_t datum, struct map_equal_args *args)
-{
- avro_datum_t datum_other = NULL;
-
- st_lookup(args->st, (st_data_t) key, (st_data_t *) & datum_other);
- if (!datum_other) {
- args->rval = 0;
- return ST_STOP;
- }
- if (!avro_datum_equal(datum, datum_other)) {
- args->rval = 0;
- return ST_STOP;
- }
- return ST_CONTINUE;
-}
-
-static int map_equal(struct avro_map_datum_t *a, struct avro_map_datum_t *b)
-{
- struct map_equal_args args = { 1, avro_datum_to_map(b)->map };
- if (a->map->num_entries != b->map->num_entries) {
- return 0;
- }
- st_foreach(avro_datum_to_map(a)->map,
- map_equal_foreach, (st_data_t) & args);
- return args.rval;
-}
-
-int avro_datum_equal(avro_datum_t a, avro_datum_t b)
-{
- if (!(is_avro_datum(a) && is_avro_datum(b))) {
- return 0;
- }
- if (avro_typeof(a) != avro_typeof(b)) {
- return 0;
- }
- switch (avro_typeof(a)) {
- case AVRO_STRING:
- return strcmp(avro_datum_to_string(a)->s,
- avro_datum_to_string(b)->s) == 0;
- case AVRO_BYTES:
- return (avro_datum_to_bytes(a)->size ==
- avro_datum_to_bytes(b)->size)
- && memcmp(avro_datum_to_bytes(a)->bytes,
- avro_datum_to_bytes(b)->bytes,
- avro_datum_to_bytes(a)->size) == 0;
- case AVRO_INT:
- return avro_datum_to_int(a)->i == avro_datum_to_int(b)->i;
- case AVRO_LONG:
- return avro_datum_to_long(a)->l == avro_datum_to_long(b)->l;
- case AVRO_FLOAT:
- return avro_datum_to_float(a)->f == avro_datum_to_float(b)->f;
- case AVRO_DOUBLE:
- return avro_datum_to_double(a)->d == avro_datum_to_double(b)->d;
- case AVRO_BOOLEAN:
- return avro_datum_to_boolean(a)->i ==
- avro_datum_to_boolean(b)->i;
- case AVRO_NULL:
- return 1;
- case AVRO_ARRAY:
- return array_equal(avro_datum_to_array(a),
- avro_datum_to_array(b));
- case AVRO_MAP:
- return map_equal(avro_datum_to_map(a), avro_datum_to_map(b));
- case AVRO_RECORD:
- case AVRO_ENUM:
- case AVRO_FIXED:
- case AVRO_UNION:
- case AVRO_LINK:
- /*
- * TODO
- */
- return 0;
- }
- return 0;
-}
-
avro_datum_t avro_string(const char *str)
{
struct avro_string_datum_t *datum =
@@ -257,14 +154,19 @@
avro_datum_t
avro_record_field_get(const avro_datum_t datum, const char *field_name)
{
- struct avro_record_datum_t *field = NULL;
+ union {
+ avro_datum_t field;
+ st_data_t data;
+ } val;
if (is_avro_datum(datum) && is_avro_record(datum)) {
struct avro_record_datum_t *record =
avro_datum_to_record(datum);
- st_lookup(record->fields, (st_data_t) field_name,
- (st_data_t *) & field);
+ if (st_lookup
+ (record->fields, (st_data_t) field_name, &(val.data))) {
+ return val.field;
+ }
}
- return &field->obj;
+ return NULL;
}
int
@@ -392,744 +294,3 @@
{
}
-
-int
-avro_schema_match(avro_schema_t writers_schema, avro_schema_t readers_schema)
-{
- if (!is_avro_schema(writers_schema) || !is_avro_schema(readers_schema)) {
- return 0;
- }
-
- switch (avro_typeof(writers_schema)) {
- case AVRO_UNION:
- return 1;
-
- case AVRO_INT:
- return is_avro_int(readers_schema)
- || is_avro_long(readers_schema)
- || is_avro_float(readers_schema)
- || is_avro_double(readers_schema);
-
- case AVRO_LONG:
- return is_avro_long(readers_schema)
- || is_avro_float(readers_schema)
- || is_avro_double(readers_schema);
-
- case AVRO_FLOAT:
- return is_avro_float(readers_schema)
- || is_avro_double(readers_schema);
-
- case AVRO_STRING:
- case AVRO_BYTES:
- case AVRO_DOUBLE:
- case AVRO_BOOLEAN:
- case AVRO_NULL:
- return avro_typeof(writers_schema) ==
- avro_typeof(readers_schema);
-
- case AVRO_RECORD:
- return is_avro_record(readers_schema)
- && strcmp(avro_schema_name(writers_schema),
- avro_schema_name(readers_schema)) == 0;
-
- case AVRO_FIXED:
- return is_avro_fixed(readers_schema)
- && strcmp(avro_schema_name(writers_schema),
- avro_schema_name(readers_schema)) == 0
- && (avro_schema_to_fixed(writers_schema))->size ==
- (avro_schema_to_fixed(readers_schema))->size;
-
- case AVRO_ENUM:
- return is_avro_enum(readers_schema)
- && strcmp(avro_schema_to_enum(writers_schema)->name,
- avro_schema_to_enum(readers_schema)->name) == 0;
-
- case AVRO_MAP:
- return is_avro_map(readers_schema)
- && avro_typeof(avro_schema_to_map(writers_schema)->values)
- == avro_typeof(avro_schema_to_map(readers_schema)->values);
-
- case AVRO_ARRAY:
- return is_avro_array(readers_schema)
- && avro_typeof(avro_schema_to_array(writers_schema)->items)
- == avro_typeof(avro_schema_to_array(readers_schema)->items);
-
- case AVRO_LINK:
- /*
- * TODO
- */
- break;
- }
-
- return 0;
-}
-
-static int
-read_fixed(avro_reader_t reader, const avro_encoding_t * enc,
- avro_schema_t writers_schema, avro_schema_t readers_schema,
- avro_datum_t * datum)
-{
- return 1;
-}
-
-static int
-read_enum(avro_reader_t reader, const avro_encoding_t * enc,
- avro_schema_t writers_schema, avro_schema_t readers_schema,
- avro_datum_t * datum)
-{
- return 1;
-}
-
-static int
-read_array(avro_reader_t reader, const avro_encoding_t * enc,
- struct avro_array_schema_t *writers_schema,
- struct avro_array_schema_t *readers_schema, avro_datum_t * datum)
-{
- int rval;
- int64_t i;
- int64_t block_count;
- int64_t block_size;
- avro_datum_t array_datum;
-
- rval = enc->read_long(reader, &block_count);
- if (rval) {
- return rval;
- }
-
- array_datum = avro_array();
- while (block_count != 0) {
- if (block_count < 0) {
- block_count = block_count * -1;
- rval = enc->read_long(reader, &block_size);
- if (rval) {
- return rval;
- }
- }
-
- for (i = 0; i < block_count; i++) {
- avro_datum_t datum;
-
- rval =
- avro_read_data(reader, writers_schema->items,
- readers_schema->items, &datum);
- if (rval) {
- return rval;
- }
- rval = avro_array_append_datum(array_datum, datum);
- if (rval) {
- avro_datum_decref(array_datum);
- return rval;
- }
- }
-
- rval = enc->read_long(reader, &block_count);
- if (rval) {
- return rval;
- }
- }
- *datum = array_datum;
- return 0;
-}
-
-static int
-read_map(avro_reader_t reader, const avro_encoding_t * enc,
- struct avro_map_schema_t *writers_schema,
- struct avro_map_schema_t *readers_schema, avro_datum_t * datum)
-{
- int rval;
- int64_t i, block_count;
- avro_datum_t map = avro_map();
-
- rval = enc->read_long(reader, &block_count);
- if (rval) {
- return rval;
- }
- while (block_count != 0) {
- int64_t block_size;
- if (block_count < 0) {
- block_count = block_count * -1;
- rval = enc->read_long(reader, &block_size);
- if (rval) {
- return rval;
- }
- }
- for (i = 0; i < block_count; i++) {
- char *key;
- avro_datum_t value;
- rval = enc->read_string(reader, &key);
- if (rval) {
- return rval;
- }
- rval =
- avro_read_data(reader,
- avro_schema_to_map(writers_schema)->
- values,
- avro_schema_to_map(readers_schema)->
- values, &value);
- if (rval) {
- return rval;
- }
- rval = avro_map_set(map, key, value);
- if (rval) {
- return rval;
- }
- }
- rval = enc->read_long(reader, &block_count);
- if (rval) {
- return rval;
- }
- }
- *datum = map;
- return 0;
-}
-
-static int
-read_union(avro_reader_t reader, const avro_encoding_t * enc,
- avro_schema_t writers_schema, avro_schema_t readers_schema,
- avro_datum_t * datum)
-{
- return 1;
-}
-
-static int
-read_record(avro_reader_t reader, const avro_encoding_t * enc,
- avro_schema_t writers_schema, avro_schema_t readers_schema,
- avro_datum_t * datum)
-{
- return 1;
-}
-
-int
-avro_read_data(avro_reader_t reader, avro_schema_t writers_schema,
- avro_schema_t readers_schema, avro_datum_t * datum)
-{
- int rval = EINVAL;
- const avro_encoding_t *enc = &avro_binary_encoding;
-
- if (!reader || !is_avro_schema(writers_schema) || !datum) {
- return EINVAL;
- }
-
- if (readers_schema == NULL) {
- readers_schema = writers_schema;
- } else if (!avro_schema_match(writers_schema, readers_schema)) {
- return EINVAL;
- }
-
- /*
- * schema resolution
- */
- if (!is_avro_union(writers_schema) && is_avro_union(readers_schema)) {
- struct avro_union_branch_t *branch;
- struct avro_union_schema_t *union_schema =
- avro_schema_to_union(readers_schema);
-
- for (branch = STAILQ_FIRST(&union_schema->branches);
- branch != NULL; branch = STAILQ_NEXT(branch, branches)) {
- if (avro_schema_match(writers_schema, branch->schema)) {
- return avro_read_data(reader, writers_schema,
- branch->schema, datum);
- }
- }
- return EINVAL;
- }
-
- switch (avro_typeof(writers_schema)) {
- case AVRO_NULL:
- rval = enc->read_null(reader);
- *datum = avro_null();
- break;
-
- case AVRO_BOOLEAN:
- {
- int8_t b;
- rval = enc->read_boolean(reader, &b);
- *datum = avro_boolean(b);
- }
- break;
-
- case AVRO_STRING:
- {
- char *s;
- rval = enc->read_string(reader, &s);
- *datum = avro_string(s);
- }
- break;
-
- case AVRO_INT:
- {
- int32_t i;
- rval = enc->read_int(reader, &i);
- *datum = avro_int(i);
- }
- break;
-
- case AVRO_LONG:
- {
- int64_t l;
- rval = enc->read_long(reader, &l);
- *datum = avro_long(l);
- }
- break;
-
- case AVRO_FLOAT:
- {
- float f;
- rval = enc->read_float(reader, &f);
- *datum = avro_float(f);
- }
- break;
-
- case AVRO_DOUBLE:
- {
- double d;
- rval = enc->read_double(reader, &d);
- *datum = avro_double(d);
- }
- break;
-
- case AVRO_BYTES:
- {
- char *bytes;
- int64_t len;
- rval = enc->read_bytes(reader, &bytes, &len);
- *datum = avro_bytes(bytes, len);
- }
- break;
-
- case AVRO_FIXED:
- rval =
- read_fixed(reader, enc, writers_schema, readers_schema,
- datum);
- break;
-
- case AVRO_ENUM:
- rval =
- read_enum(reader, enc, writers_schema, readers_schema,
- datum);
- break;
-
- case AVRO_ARRAY:
- rval =
- read_array(reader, enc,
- avro_schema_to_array(writers_schema),
- avro_schema_to_array(readers_schema), datum);
- break;
-
- case AVRO_MAP:
- rval =
- read_map(reader, enc, avro_schema_to_map(writers_schema),
- avro_schema_to_map(readers_schema), datum);
- break;
-
- case AVRO_UNION:
- rval =
- read_union(reader, enc, writers_schema, readers_schema,
- datum);
- break;
-
- case AVRO_RECORD:
- rval =
- read_record(reader, enc, writers_schema, readers_schema,
- datum);
- break;
-
- case AVRO_LINK:
- rval =
- avro_read_data(reader,
- (avro_schema_to_link(writers_schema))->to,
- readers_schema, datum);
- break;
- }
-
- return rval;
-}
-
-struct validate_st {
- avro_schema_t expected_schema;
- int rval;
-};
-
-static int
-schema_map_validate_foreach(char *key, avro_datum_t datum,
- struct validate_st *vst)
-{
- if (!avro_schema_datum_validate(vst->expected_schema, datum)) {
- vst->rval = 0;
- return ST_STOP;
- }
- return ST_CONTINUE;
-}
-
-int
-avro_schema_datum_validate(avro_schema_t expected_schema, avro_datum_t datum)
-{
- if (!is_avro_schema(expected_schema) || !is_avro_datum(datum)) {
- return EINVAL;
- }
-
- switch (avro_typeof(expected_schema)) {
- case AVRO_NULL:
- return is_avro_null(datum);
-
- case AVRO_BOOLEAN:
- return is_avro_boolean(datum);
-
- case AVRO_STRING:
- return is_avro_string(datum);
-
- case AVRO_BYTES:
- return is_avro_bytes(datum);
-
- case AVRO_INT:
- return is_avro_int(datum)
- || (is_avro_long(datum)
- && (INT_MIN <= avro_datum_to_long(datum)->l
- && avro_datum_to_long(datum)->l <= INT_MAX));
-
- case AVRO_LONG:
- return is_avro_int(datum) || is_avro_long(datum);
-
- case AVRO_FLOAT:
- return is_avro_int(datum) || is_avro_long(datum)
- || is_avro_float(datum);
-
- case AVRO_DOUBLE:
- return is_avro_int(datum) || is_avro_long(datum)
- || is_avro_float(datum) || is_avro_double(datum);
-
- case AVRO_FIXED:
- return (is_avro_fixed(datum)
- && (avro_schema_to_fixed(expected_schema)->size ==
- avro_datum_to_fixed(datum)->size));
-
- case AVRO_ENUM:
- {
- struct avro_enum_schema_t *enump =
- avro_schema_to_enum(expected_schema);
- struct avro_enum_symbol_t *symbol =
- STAILQ_FIRST(&enump->symbols);
- while (symbol) {
- if (!strcmp
- (symbol->symbol,
- avro_datum_to_enum(datum)->symbol)) {
- return 1;
- }
- symbol = STAILQ_NEXT(symbol, symbols);
- }
- return 0;
- }
- break;
-
- case AVRO_ARRAY:
- {
- if (is_avro_array(datum)) {
- struct avro_array_datum_t *array =
- avro_datum_to_array(datum);
- struct avro_array_element_t *el =
- STAILQ_FIRST(&array->els);
- while (el) {
- if (!avro_schema_datum_validate
- ((avro_schema_to_array
- (expected_schema))->items,
- el->datum)) {
- return 0;
- }
- el = STAILQ_NEXT(el, els);
- }
- return 1;
- }
- return 0;
- }
- break;
-
- case AVRO_MAP:
- if (is_avro_map(datum)) {
- struct validate_st vst =
- { avro_schema_to_map(expected_schema)->values, 1 };
- st_foreach(avro_datum_to_map(datum)->map,
- schema_map_validate_foreach,
- (st_data_t) & vst);
- return vst.rval;
- }
- break;
-
- case AVRO_UNION:
- {
- struct avro_union_schema_t *union_schema =
- avro_schema_to_union(expected_schema);
- struct avro_union_branch_t *branch;
-
- for (branch = STAILQ_FIRST(&union_schema->branches);
- branch != NULL;
- branch = STAILQ_NEXT(branch, branches)) {
- if (avro_schema_datum_validate
- (branch->schema, datum)) {
- return 1;
- }
- }
- return 0;
- }
- break;
-
- case AVRO_RECORD:
- if (is_avro_record(datum)) {
- struct avro_record_schema_t *record_schema =
- avro_schema_to_record(expected_schema);
- struct avro_record_field_t *field;
- for (field = STAILQ_FIRST(&record_schema->fields);
- field != NULL;
- field = STAILQ_NEXT(field, fields)) {
- avro_datum_t field_datum =
- avro_record_field_get(datum, field->name);
- if (!field_datum) {
- /*
- * TODO: check for default values
- */
- return 0;
- }
- if (!avro_schema_datum_validate
- (field->type, field_datum)) {
- return 0;
- }
- }
- return 1;
- }
- break;
-
- case AVRO_LINK:
- {
- return
- avro_schema_datum_validate((avro_schema_to_link
- (expected_schema))->to,
- datum);
- }
- break;
- }
- return 0;
-}
-
-static int
-write_record(avro_writer_t writer, const avro_encoding_t * enc,
- avro_schema_t writer_schema, avro_datum_t datum)
-{
- /*
- * TODO
- */
- return EINVAL;
-}
-
-static int
-write_enum(avro_writer_t writer, const avro_encoding_t * enc,
- avro_schema_t writer_schema, avro_datum_t datum)
-{
- /*
- * TODO
- */
- return EINVAL;
-}
-
-static int
-write_fixed(avro_writer_t writer, const avro_encoding_t * enc,
- avro_schema_t writer_schema, avro_datum_t datum)
-{
- /*
- * TODO
- */
- return EINVAL;
-}
-
-struct write_map_args {
- int rval;
- avro_writer_t writer;
- const avro_encoding_t *enc;
- avro_schema_t values_schema;
-};
-
-static int
-write_map_foreach(char *key, avro_datum_t datum, struct write_map_args *args)
-{
- int rval = args->enc->write_string(args->writer, key);
- if (rval) {
- args->rval = rval;
- return ST_STOP;
- }
- rval = avro_write_data(args->writer, args->values_schema, datum);
- if (rval) {
- args->rval = rval;
- return ST_STOP;
- }
- return ST_CONTINUE;
-}
-
-static int
-write_map(avro_writer_t writer, const avro_encoding_t * enc,
- struct avro_map_schema_t *writer_schema,
- struct avro_map_datum_t *datum)
-{
- int rval;
- struct write_map_args args = { 0, writer, enc, writer_schema->values };
-
- if (datum->map->num_entries) {
- rval = enc->write_long(writer, datum->map->num_entries);
- if (rval) {
- return rval;
- }
- st_foreach(datum->map, write_map_foreach, (st_data_t) & args);
- }
- if (!args.rval) {
- rval = enc->write_long(writer, 0);
- if (rval) {
- return rval;
- }
- return 0;
- }
- return args.rval;
-}
-
-static int
-write_array(avro_writer_t writer, const avro_encoding_t * enc,
- struct avro_array_schema_t *schema,
- struct avro_array_datum_t *array)
-{
- int rval;
- struct avro_array_element_t *el;
-
- if (array->num_elements) {
- rval = enc->write_long(writer, array->num_elements);
- if (rval) {
- return rval;
- }
- for (el = STAILQ_FIRST(&array->els);
- el != NULL; el = STAILQ_NEXT(el, els)) {
- rval =
- avro_write_data(writer, schema->items, el->datum);
- if (rval) {
- return rval;
- }
- }
- }
- return enc->write_long(writer, 0);
-}
-
-int
-avro_write_data(avro_writer_t writer, avro_schema_t writer_schema,
- avro_datum_t datum)
-{
- const avro_encoding_t *enc = &avro_binary_encoding;
- int rval = -1;
-
- if (!writer || !(is_avro_schema(writer_schema) && is_avro_datum(datum))) {
- return EINVAL;
- }
- if (!avro_schema_datum_validate(writer_schema, datum)) {
- return EINVAL;
- }
- switch (avro_typeof(writer_schema)) {
- case AVRO_NULL:
- rval = enc->write_null(writer);
- break;
- case AVRO_BOOLEAN:
- rval =
- enc->write_boolean(writer, avro_datum_to_boolean(datum)->i);
- break;
- case AVRO_STRING:
- rval =
- enc->write_string(writer, avro_datum_to_string(datum)->s);
- break;
- case AVRO_BYTES:
- rval =
- enc->write_bytes(writer, avro_datum_to_bytes(datum)->bytes,
- avro_datum_to_bytes(datum)->size);
- break;
- case AVRO_INT:
- {
- int32_t i;
- if (is_avro_int(datum)) {
- i = avro_datum_to_int(datum)->i;
- } else if (is_avro_long(datum)) {
- i = (int32_t) avro_datum_to_long(datum)->l;
- } else {
- assert(0
- &&
- "Serious bug in schema validation code");
- }
- rval = enc->write_int(writer, i);
- }
- break;
- case AVRO_LONG:
- rval = enc->write_long(writer, avro_datum_to_long(datum)->l);
- break;
- case AVRO_FLOAT:
- {
- float f;
- if (is_avro_int(datum)) {
- f = (float)(avro_datum_to_int(datum)->i);
- } else if (is_avro_long(datum)) {
- f = (float)(avro_datum_to_long(datum)->l);
- } else if (is_avro_float(datum)) {
- f = avro_datum_to_float(datum)->f;
- } else if (is_avro_double(datum)) {
- f = (float)(avro_datum_to_double(datum)->d);
- } else {
- assert(0
- &&
- "Serious bug in schema validation code");
- }
- rval = enc->write_float(writer, f);
- }
- break;
- case AVRO_DOUBLE:
- {
- double d;
- if (is_avro_int(datum)) {
- d = (double)(avro_datum_to_int(datum)->i);
- } else if (is_avro_long(datum)) {
- d = (double)(avro_datum_to_long(datum)->l);
- } else if (is_avro_float(datum)) {
- d = (double)(avro_datum_to_float(datum)->f);
- } else if (is_avro_double(datum)) {
- d = avro_datum_to_double(datum)->d;
- } else {
- assert(0 && "Bug in schema validation code");
- }
- rval = enc->write_double(writer, d);
- }
- break;
-
- case AVRO_RECORD:
- rval = write_record(writer, enc, writer_schema, datum);
- break;
- case AVRO_ENUM:
- rval = write_enum(writer, enc, writer_schema, datum);
- break;
- case AVRO_FIXED:
- rval = write_fixed(writer, enc, writer_schema, datum);
- break;
- case AVRO_MAP:
- rval =
- write_map(writer, enc, avro_schema_to_map(writer_schema),
- avro_datum_to_map(datum));
- break;
- case AVRO_ARRAY:
- rval =
- write_array(writer, enc,
- avro_schema_to_array(writer_schema),
- avro_datum_to_array(datum));
- break;
-
- case AVRO_UNION:
- {
- assert(0 && "Bug in schema validation code");
- }
- break;
-
- case AVRO_LINK:
- rval =
- avro_write_data(writer,
- (avro_schema_to_link(writer_schema))->to,
- datum);
- break;
- }
- return rval;
-}
Added: hadoop/avro/trunk/lang/c/src/datum_equal.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum_equal.c?rev=901950&view=auto
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum_equal.c (added)
+++ hadoop/avro/trunk/lang/c/src/datum_equal.c Fri Jan 22 00:38:01 2010
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ */
+
+#include <string.h>
+#include "datum.h"
+
+static int
+array_equal(struct avro_array_datum_t *a, struct avro_array_datum_t *b)
+{
+ struct avro_array_element_t *a_el, *b_el;
+ if (a->num_elements != b->num_elements) {
+ return 0;
+ }
+ for (a_el = STAILQ_FIRST(&a->els),
+ b_el = STAILQ_FIRST(&b->els);
+ !(a_el == NULL && a_el == NULL);
+ a_el = STAILQ_NEXT(a_el, els), b_el = STAILQ_NEXT(b_el, els)) {
+ if (a_el == NULL || b_el == NULL) {
+ return 0; /* different number of elements */
+ }
+ if (!avro_datum_equal(a_el->datum, b_el->datum)) {
+ return 0;
+ }
+ }
+ return 1;
+}
+
+struct st_equal_args {
+ int rval;
+ st_table *st;
+};
+
+static int
+st_equal_foreach(char *key, avro_datum_t datum, struct st_equal_args *args)
+{
+ union {
+ avro_datum_t datum_other;
+ st_data_t data;
+ } val;
+ if (!st_lookup(args->st, (st_data_t) key, &(val.data))) {
+ args->rval = 0;
+ return ST_STOP;
+ }
+ if (!avro_datum_equal(datum, val.datum_other)) {
+ args->rval = 0;
+ return ST_STOP;
+ }
+ return ST_CONTINUE;
+}
+
+static int map_equal(struct avro_map_datum_t *a, struct avro_map_datum_t *b)
+{
+ struct st_equal_args args = { 1, b->map };
+ if (a->map->num_entries != b->map->num_entries) {
+ return 0;
+ }
+ st_foreach(a->map, st_equal_foreach, (st_data_t) & args);
+ return args.rval;
+}
+
+static int record_equal(struct avro_record_datum_t *a,
+ struct avro_record_datum_t *b)
+{
+ struct st_equal_args args = { 1, b->fields };
+ if (a->fields->num_entries != b->fields->num_entries) {
+ fprintf(stderr, "num entries mismatch %d != %d\n",
+ a->fields->num_entries, b->fields->num_entries);
+ return 0;
+ }
+ fprintf(stderr, "Each record has %d values\n", a->fields->num_entries);
+ st_foreach(a->fields, st_equal_foreach, (st_data_t) & args);
+ return args.rval;
+}
+
+int avro_datum_equal(avro_datum_t a, avro_datum_t b)
+{
+ if (!(is_avro_datum(a) && is_avro_datum(b))) {
+ return 0;
+ }
+ if (avro_typeof(a) != avro_typeof(b)) {
+ return 0;
+ }
+ switch (avro_typeof(a)) {
+ case AVRO_STRING:
+ return strcmp(avro_datum_to_string(a)->s,
+ avro_datum_to_string(b)->s) == 0;
+ case AVRO_BYTES:
+ return (avro_datum_to_bytes(a)->size ==
+ avro_datum_to_bytes(b)->size)
+ && memcmp(avro_datum_to_bytes(a)->bytes,
+ avro_datum_to_bytes(b)->bytes,
+ avro_datum_to_bytes(a)->size) == 0;
+ case AVRO_INT:
+ return avro_datum_to_int(a)->i == avro_datum_to_int(b)->i;
+ case AVRO_LONG:
+ return avro_datum_to_long(a)->l == avro_datum_to_long(b)->l;
+ case AVRO_FLOAT:
+ return avro_datum_to_float(a)->f == avro_datum_to_float(b)->f;
+ case AVRO_DOUBLE:
+ return avro_datum_to_double(a)->d == avro_datum_to_double(b)->d;
+ case AVRO_BOOLEAN:
+ return avro_datum_to_boolean(a)->i ==
+ avro_datum_to_boolean(b)->i;
+ case AVRO_NULL:
+ return 1;
+ case AVRO_ARRAY:
+ return array_equal(avro_datum_to_array(a),
+ avro_datum_to_array(b));
+ case AVRO_MAP:
+ return map_equal(avro_datum_to_map(a), avro_datum_to_map(b));
+
+ case AVRO_RECORD:
+ return record_equal(avro_datum_to_record(a),
+ avro_datum_to_record(b));
+
+ case AVRO_ENUM:
+ case AVRO_FIXED:
+ case AVRO_UNION:
+ case AVRO_LINK:
+ /*
+ * TODO
+ */
+ return 0;
+ }
+ return 0;
+}
Added: hadoop/avro/trunk/lang/c/src/datum_read.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum_read.c?rev=901950&view=auto
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum_read.c (added)
+++ hadoop/avro/trunk/lang/c/src/datum_read.c Fri Jan 22 00:38:01 2010
@@ -0,0 +1,409 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ */
+#include <errno.h>
+#include <string.h>
+#include "encoding.h"
+#include "schema.h"
+#include "datum.h"
+
+int
+avro_schema_match(avro_schema_t writers_schema, avro_schema_t readers_schema)
+{
+ if (!is_avro_schema(writers_schema) || !is_avro_schema(readers_schema)) {
+ return 0;
+ }
+
+ switch (avro_typeof(writers_schema)) {
+ case AVRO_UNION:
+ return 1;
+
+ case AVRO_INT:
+ return is_avro_int(readers_schema)
+ || is_avro_long(readers_schema)
+ || is_avro_float(readers_schema)
+ || is_avro_double(readers_schema);
+
+ case AVRO_LONG:
+ return is_avro_long(readers_schema)
+ || is_avro_float(readers_schema)
+ || is_avro_double(readers_schema);
+
+ case AVRO_FLOAT:
+ return is_avro_float(readers_schema)
+ || is_avro_double(readers_schema);
+
+ case AVRO_STRING:
+ case AVRO_BYTES:
+ case AVRO_DOUBLE:
+ case AVRO_BOOLEAN:
+ case AVRO_NULL:
+ return avro_typeof(writers_schema) ==
+ avro_typeof(readers_schema);
+
+ case AVRO_RECORD:
+ return is_avro_record(readers_schema)
+ && strcmp(avro_schema_name(writers_schema),
+ avro_schema_name(readers_schema)) == 0;
+
+ case AVRO_FIXED:
+ return is_avro_fixed(readers_schema)
+ && strcmp(avro_schema_name(writers_schema),
+ avro_schema_name(readers_schema)) == 0
+ && (avro_schema_to_fixed(writers_schema))->size ==
+ (avro_schema_to_fixed(readers_schema))->size;
+
+ case AVRO_ENUM:
+ return is_avro_enum(readers_schema)
+ && strcmp(avro_schema_to_enum(writers_schema)->name,
+ avro_schema_to_enum(readers_schema)->name) == 0;
+
+ case AVRO_MAP:
+ return is_avro_map(readers_schema)
+ && avro_typeof(avro_schema_to_map(writers_schema)->values)
+ == avro_typeof(avro_schema_to_map(readers_schema)->values);
+
+ case AVRO_ARRAY:
+ return is_avro_array(readers_schema)
+ && avro_typeof(avro_schema_to_array(writers_schema)->items)
+ == avro_typeof(avro_schema_to_array(readers_schema)->items);
+
+ case AVRO_LINK:
+ /*
+ * TODO
+ */
+ break;
+ }
+
+ return 0;
+}
+
+static int
+read_fixed(avro_reader_t reader, const avro_encoding_t * enc,
+ avro_schema_t writers_schema, avro_schema_t readers_schema,
+ avro_datum_t * datum)
+{
+ return 1;
+}
+
+static int
+read_enum(avro_reader_t reader, const avro_encoding_t * enc,
+ avro_schema_t writers_schema, avro_schema_t readers_schema,
+ avro_datum_t * datum)
+{
+ return 1;
+}
+
+static int
+read_array(avro_reader_t reader, const avro_encoding_t * enc,
+ struct avro_array_schema_t *writers_schema,
+ struct avro_array_schema_t *readers_schema, avro_datum_t * datum)
+{
+ int rval;
+ int64_t i;
+ int64_t block_count;
+ int64_t block_size;
+ avro_datum_t array_datum;
+
+ rval = enc->read_long(reader, &block_count);
+ if (rval) {
+ return rval;
+ }
+
+ array_datum = avro_array();
+ while (block_count != 0) {
+ if (block_count < 0) {
+ block_count = block_count * -1;
+ rval = enc->read_long(reader, &block_size);
+ if (rval) {
+ return rval;
+ }
+ }
+
+ for (i = 0; i < block_count; i++) {
+ avro_datum_t datum;
+
+ rval =
+ avro_read_data(reader, writers_schema->items,
+ readers_schema->items, &datum);
+ if (rval) {
+ return rval;
+ }
+ rval = avro_array_append_datum(array_datum, datum);
+ if (rval) {
+ avro_datum_decref(array_datum);
+ return rval;
+ }
+ }
+
+ rval = enc->read_long(reader, &block_count);
+ if (rval) {
+ return rval;
+ }
+ }
+ *datum = array_datum;
+ return 0;
+}
+
+static int
+read_map(avro_reader_t reader, const avro_encoding_t * enc,
+ struct avro_map_schema_t *writers_schema,
+ struct avro_map_schema_t *readers_schema, avro_datum_t * datum)
+{
+ int rval;
+ int64_t i, block_count;
+ avro_datum_t map = avro_map();
+
+ rval = enc->read_long(reader, &block_count);
+ if (rval) {
+ return rval;
+ }
+ while (block_count != 0) {
+ int64_t block_size;
+ if (block_count < 0) {
+ block_count = block_count * -1;
+ rval = enc->read_long(reader, &block_size);
+ if (rval) {
+ return rval;
+ }
+ }
+ for (i = 0; i < block_count; i++) {
+ char *key;
+ avro_datum_t value;
+ rval = enc->read_string(reader, &key);
+ if (rval) {
+ return rval;
+ }
+ rval =
+ avro_read_data(reader,
+ avro_schema_to_map(writers_schema)->
+ values,
+ avro_schema_to_map(readers_schema)->
+ values, &value);
+ if (rval) {
+ return rval;
+ }
+ rval = avro_map_set(map, key, value);
+ if (rval) {
+ return rval;
+ }
+ }
+ rval = enc->read_long(reader, &block_count);
+ if (rval) {
+ return rval;
+ }
+ }
+ *datum = map;
+ return 0;
+}
+
+static int
+read_union(avro_reader_t reader, const avro_encoding_t * enc,
+ avro_schema_t writers_schema, avro_schema_t readers_schema,
+ avro_datum_t * datum)
+{
+ return 1;
+}
+
+/* TODO: handle default values in fields */
+static int
+read_record(avro_reader_t reader, const avro_encoding_t * enc,
+ struct avro_record_schema_t *writers_schema,
+ struct avro_record_schema_t *readers_schema, avro_datum_t * datum)
+{
+ int rval;
+ struct avro_record_field_t *reader_field;
+ struct avro_record_field_t *field;
+ avro_datum_t record;
+ avro_datum_t field_datum;
+
+ record = *datum = avro_record(writers_schema->name);
+ for (field = STAILQ_FIRST(&writers_schema->fields);
+ field != NULL; field = STAILQ_NEXT(field, fields)) {
+ for (reader_field = STAILQ_FIRST(&readers_schema->fields);
+ reader_field != NULL;
+ reader_field = STAILQ_NEXT(reader_field, fields)) {
+ if (strcmp(field->name, reader_field->name) == 0) {
+ break;
+ }
+ }
+ if (reader_field) {
+ rval =
+ avro_read_data(reader, field->type,
+ reader_field->type, &field_datum);
+ if (rval) {
+ return rval;
+ }
+ rval =
+ avro_record_field_set(record, field->name,
+ field_datum);
+ if (rval) {
+ return rval;
+ }
+ } else {
+ /* TODO: skip_record */
+ return -1;
+ }
+ }
+ return 0;
+}
+
+int
+avro_read_data(avro_reader_t reader, avro_schema_t writers_schema,
+ avro_schema_t readers_schema, avro_datum_t * datum)
+{
+ int rval = EINVAL;
+ const avro_encoding_t *enc = &avro_binary_encoding;
+
+ if (!reader || !is_avro_schema(writers_schema) || !datum) {
+ return EINVAL;
+ }
+
+ if (readers_schema == NULL) {
+ readers_schema = writers_schema;
+ } else if (!avro_schema_match(writers_schema, readers_schema)) {
+ return EINVAL;
+ }
+
+ /*
+ * schema resolution
+ */
+ if (!is_avro_union(writers_schema) && is_avro_union(readers_schema)) {
+ struct avro_union_branch_t *branch;
+ struct avro_union_schema_t *union_schema =
+ avro_schema_to_union(readers_schema);
+
+ for (branch = STAILQ_FIRST(&union_schema->branches);
+ branch != NULL; branch = STAILQ_NEXT(branch, branches)) {
+ if (avro_schema_match(writers_schema, branch->schema)) {
+ return avro_read_data(reader, writers_schema,
+ branch->schema, datum);
+ }
+ }
+ return EINVAL;
+ }
+
+ switch (avro_typeof(writers_schema)) {
+ case AVRO_NULL:
+ rval = enc->read_null(reader);
+ *datum = avro_null();
+ break;
+
+ case AVRO_BOOLEAN:
+ {
+ int8_t b;
+ rval = enc->read_boolean(reader, &b);
+ *datum = avro_boolean(b);
+ }
+ break;
+
+ case AVRO_STRING:
+ {
+ char *s;
+ rval = enc->read_string(reader, &s);
+ *datum = avro_string(s);
+ }
+ break;
+
+ case AVRO_INT:
+ {
+ int32_t i;
+ rval = enc->read_int(reader, &i);
+ *datum = avro_int(i);
+ }
+ break;
+
+ case AVRO_LONG:
+ {
+ int64_t l;
+ rval = enc->read_long(reader, &l);
+ *datum = avro_long(l);
+ }
+ break;
+
+ case AVRO_FLOAT:
+ {
+ float f;
+ rval = enc->read_float(reader, &f);
+ *datum = avro_float(f);
+ }
+ break;
+
+ case AVRO_DOUBLE:
+ {
+ double d;
+ rval = enc->read_double(reader, &d);
+ *datum = avro_double(d);
+ }
+ break;
+
+ case AVRO_BYTES:
+ {
+ char *bytes;
+ int64_t len;
+ rval = enc->read_bytes(reader, &bytes, &len);
+ *datum = avro_bytes(bytes, len);
+ }
+ break;
+
+ case AVRO_FIXED:
+ rval =
+ read_fixed(reader, enc, writers_schema, readers_schema,
+ datum);
+ break;
+
+ case AVRO_ENUM:
+ rval =
+ read_enum(reader, enc, writers_schema, readers_schema,
+ datum);
+ break;
+
+ case AVRO_ARRAY:
+ rval =
+ read_array(reader, enc,
+ avro_schema_to_array(writers_schema),
+ avro_schema_to_array(readers_schema), datum);
+ break;
+
+ case AVRO_MAP:
+ rval =
+ read_map(reader, enc, avro_schema_to_map(writers_schema),
+ avro_schema_to_map(readers_schema), datum);
+ break;
+
+ case AVRO_UNION:
+ rval =
+ read_union(reader, enc, writers_schema, readers_schema,
+ datum);
+ break;
+
+ case AVRO_RECORD:
+ rval =
+ read_record(reader, enc,
+ avro_schema_to_record(writers_schema),
+ avro_schema_to_record(readers_schema), datum);
+ break;
+
+ case AVRO_LINK:
+ rval =
+ avro_read_data(reader,
+ (avro_schema_to_link(writers_schema))->to,
+ readers_schema, datum);
+ break;
+ }
+
+ return rval;
+}
Added: hadoop/avro/trunk/lang/c/src/datum_validate.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum_validate.c?rev=901950&view=auto
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum_validate.c (added)
+++ hadoop/avro/trunk/lang/c/src/datum_validate.c Fri Jan 22 00:38:01 2010
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ */
+#include <limits.h>
+#include <errno.h>
+#include <string.h>
+#include "schema.h"
+#include "datum.h"
+#include "st.h"
+
+struct validate_st {
+ avro_schema_t expected_schema;
+ int rval;
+};
+
+static int
+schema_map_validate_foreach(char *key, avro_datum_t datum,
+ struct validate_st *vst)
+{
+ if (!avro_schema_datum_validate(vst->expected_schema, datum)) {
+ vst->rval = 0;
+ return ST_STOP;
+ }
+ return ST_CONTINUE;
+}
+
+int
+avro_schema_datum_validate(avro_schema_t expected_schema, avro_datum_t datum)
+{
+ if (!is_avro_schema(expected_schema) || !is_avro_datum(datum)) {
+ return EINVAL;
+ }
+
+ switch (avro_typeof(expected_schema)) {
+ case AVRO_NULL:
+ return is_avro_null(datum);
+
+ case AVRO_BOOLEAN:
+ return is_avro_boolean(datum);
+
+ case AVRO_STRING:
+ return is_avro_string(datum);
+
+ case AVRO_BYTES:
+ return is_avro_bytes(datum);
+
+ case AVRO_INT:
+ return is_avro_int(datum)
+ || (is_avro_long(datum)
+ && (INT_MIN <= avro_datum_to_long(datum)->l
+ && avro_datum_to_long(datum)->l <= INT_MAX));
+
+ case AVRO_LONG:
+ return is_avro_int(datum) || is_avro_long(datum);
+
+ case AVRO_FLOAT:
+ return is_avro_int(datum) || is_avro_long(datum)
+ || is_avro_float(datum);
+
+ case AVRO_DOUBLE:
+ return is_avro_int(datum) || is_avro_long(datum)
+ || is_avro_float(datum) || is_avro_double(datum);
+
+ case AVRO_FIXED:
+ return (is_avro_fixed(datum)
+ && (avro_schema_to_fixed(expected_schema)->size ==
+ avro_datum_to_fixed(datum)->size));
+
+ case AVRO_ENUM:
+ {
+ struct avro_enum_schema_t *enump =
+ avro_schema_to_enum(expected_schema);
+ struct avro_enum_symbol_t *symbol =
+ STAILQ_FIRST(&enump->symbols);
+ while (symbol) {
+ if (!strcmp
+ (symbol->symbol,
+ avro_datum_to_enum(datum)->symbol)) {
+ return 1;
+ }
+ symbol = STAILQ_NEXT(symbol, symbols);
+ }
+ return 0;
+ }
+ break;
+
+ case AVRO_ARRAY:
+ {
+ if (is_avro_array(datum)) {
+ struct avro_array_datum_t *array =
+ avro_datum_to_array(datum);
+ struct avro_array_element_t *el =
+ STAILQ_FIRST(&array->els);
+ while (el) {
+ if (!avro_schema_datum_validate
+ ((avro_schema_to_array
+ (expected_schema))->items,
+ el->datum)) {
+ return 0;
+ }
+ el = STAILQ_NEXT(el, els);
+ }
+ return 1;
+ }
+ return 0;
+ }
+ break;
+
+ case AVRO_MAP:
+ if (is_avro_map(datum)) {
+ struct validate_st vst =
+ { avro_schema_to_map(expected_schema)->values, 1 };
+ st_foreach(avro_datum_to_map(datum)->map,
+ schema_map_validate_foreach,
+ (st_data_t) & vst);
+ return vst.rval;
+ }
+ break;
+
+ case AVRO_UNION:
+ {
+ struct avro_union_schema_t *union_schema =
+ avro_schema_to_union(expected_schema);
+ struct avro_union_branch_t *branch;
+
+ for (branch = STAILQ_FIRST(&union_schema->branches);
+ branch != NULL;
+ branch = STAILQ_NEXT(branch, branches)) {
+ if (avro_schema_datum_validate
+ (branch->schema, datum)) {
+ return 1;
+ }
+ }
+ return 0;
+ }
+ break;
+
+ case AVRO_RECORD:
+ if (is_avro_record(datum)) {
+ struct avro_record_schema_t *record_schema =
+ avro_schema_to_record(expected_schema);
+ struct avro_record_field_t *field;
+ for (field = STAILQ_FIRST(&record_schema->fields);
+ field != NULL;
+ field = STAILQ_NEXT(field, fields)) {
+ avro_datum_t field_datum =
+ avro_record_field_get(datum, field->name);
+ if (!field_datum) {
+ /*
+ * TODO: check for default values
+ */
+ return 0;
+ }
+ if (!avro_schema_datum_validate
+ (field->type, field_datum)) {
+ return 0;
+ }
+ }
+ return 1;
+ }
+ break;
+
+ case AVRO_LINK:
+ {
+ return
+ avro_schema_datum_validate((avro_schema_to_link
+ (expected_schema))->to,
+ datum);
+ }
+ break;
+ }
+ return 0;
+}
Added: hadoop/avro/trunk/lang/c/src/datum_write.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum_write.c?rev=901950&view=auto
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum_write.c (added)
+++ hadoop/avro/trunk/lang/c/src/datum_write.c Fri Jan 22 00:38:01 2010
@@ -0,0 +1,255 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ */
+#include <errno.h>
+#include <assert.h>
+#include "schema.h"
+#include "datum.h"
+#include "encoding.h"
+
+static int
+write_record(avro_writer_t writer, const avro_encoding_t * enc,
+ struct avro_record_schema_t *record, avro_datum_t datum)
+{
+ int rval;
+ struct avro_record_field_t *field = STAILQ_FIRST(&record->fields);
+ for (; field != NULL; field = STAILQ_NEXT(field, fields)) {
+ rval = avro_write_data(writer, field->type,
+ avro_record_field_get(datum,
+ field->name));
+ if (rval) {
+ return rval;
+ }
+ }
+ return 0;
+}
+
+static int
+write_enum(avro_writer_t writer, const avro_encoding_t * enc,
+ avro_schema_t writer_schema, avro_datum_t datum)
+{
+ /*
+ * TODO
+ */
+ return EINVAL;
+}
+
+static int
+write_fixed(avro_writer_t writer, const avro_encoding_t * enc,
+ avro_schema_t writer_schema, avro_datum_t datum)
+{
+ /*
+ * TODO
+ */
+ return EINVAL;
+}
+
+struct write_map_args {
+ int rval;
+ avro_writer_t writer;
+ const avro_encoding_t *enc;
+ avro_schema_t values_schema;
+};
+
+static int
+write_map_foreach(char *key, avro_datum_t datum, struct write_map_args *args)
+{
+ int rval = args->enc->write_string(args->writer, key);
+ if (rval) {
+ args->rval = rval;
+ return ST_STOP;
+ }
+ rval = avro_write_data(args->writer, args->values_schema, datum);
+ if (rval) {
+ args->rval = rval;
+ return ST_STOP;
+ }
+ return ST_CONTINUE;
+}
+
+static int
+write_map(avro_writer_t writer, const avro_encoding_t * enc,
+ struct avro_map_schema_t *writer_schema,
+ struct avro_map_datum_t *datum)
+{
+ int rval;
+ struct write_map_args args = { 0, writer, enc, writer_schema->values };
+
+ if (datum->map->num_entries) {
+ rval = enc->write_long(writer, datum->map->num_entries);
+ if (rval) {
+ return rval;
+ }
+ st_foreach(datum->map, write_map_foreach, (st_data_t) & args);
+ }
+ if (!args.rval) {
+ rval = enc->write_long(writer, 0);
+ if (rval) {
+ return rval;
+ }
+ return 0;
+ }
+ return args.rval;
+}
+
+static int
+write_array(avro_writer_t writer, const avro_encoding_t * enc,
+ struct avro_array_schema_t *schema,
+ struct avro_array_datum_t *array)
+{
+ int rval;
+ struct avro_array_element_t *el;
+
+ if (array->num_elements) {
+ rval = enc->write_long(writer, array->num_elements);
+ if (rval) {
+ return rval;
+ }
+ for (el = STAILQ_FIRST(&array->els);
+ el != NULL; el = STAILQ_NEXT(el, els)) {
+ rval =
+ avro_write_data(writer, schema->items, el->datum);
+ if (rval) {
+ return rval;
+ }
+ }
+ }
+ return enc->write_long(writer, 0);
+}
+
+int
+avro_write_data(avro_writer_t writer, avro_schema_t writer_schema,
+ avro_datum_t datum)
+{
+ const avro_encoding_t *enc = &avro_binary_encoding;
+ int rval = -1;
+
+ if (!writer || !(is_avro_schema(writer_schema) && is_avro_datum(datum))) {
+ return EINVAL;
+ }
+ if (!avro_schema_datum_validate(writer_schema, datum)) {
+ return EINVAL;
+ }
+ switch (avro_typeof(writer_schema)) {
+ case AVRO_NULL:
+ rval = enc->write_null(writer);
+ break;
+ case AVRO_BOOLEAN:
+ rval =
+ enc->write_boolean(writer, avro_datum_to_boolean(datum)->i);
+ break;
+ case AVRO_STRING:
+ rval =
+ enc->write_string(writer, avro_datum_to_string(datum)->s);
+ break;
+ case AVRO_BYTES:
+ rval =
+ enc->write_bytes(writer, avro_datum_to_bytes(datum)->bytes,
+ avro_datum_to_bytes(datum)->size);
+ break;
+ case AVRO_INT:
+ {
+ int32_t i;
+ if (is_avro_int(datum)) {
+ i = avro_datum_to_int(datum)->i;
+ } else if (is_avro_long(datum)) {
+ i = (int32_t) avro_datum_to_long(datum)->l;
+ } else {
+ assert(0
+ &&
+ "Serious bug in schema validation code");
+ }
+ rval = enc->write_int(writer, i);
+ }
+ break;
+ case AVRO_LONG:
+ rval = enc->write_long(writer, avro_datum_to_long(datum)->l);
+ break;
+ case AVRO_FLOAT:
+ {
+ float f;
+ if (is_avro_int(datum)) {
+ f = (float)(avro_datum_to_int(datum)->i);
+ } else if (is_avro_long(datum)) {
+ f = (float)(avro_datum_to_long(datum)->l);
+ } else if (is_avro_float(datum)) {
+ f = avro_datum_to_float(datum)->f;
+ } else if (is_avro_double(datum)) {
+ f = (float)(avro_datum_to_double(datum)->d);
+ } else {
+ assert(0
+ &&
+ "Serious bug in schema validation code");
+ }
+ rval = enc->write_float(writer, f);
+ }
+ break;
+ case AVRO_DOUBLE:
+ {
+ double d;
+ if (is_avro_int(datum)) {
+ d = (double)(avro_datum_to_int(datum)->i);
+ } else if (is_avro_long(datum)) {
+ d = (double)(avro_datum_to_long(datum)->l);
+ } else if (is_avro_float(datum)) {
+ d = (double)(avro_datum_to_float(datum)->f);
+ } else if (is_avro_double(datum)) {
+ d = avro_datum_to_double(datum)->d;
+ } else {
+ assert(0 && "Bug in schema validation code");
+ }
+ rval = enc->write_double(writer, d);
+ }
+ break;
+
+ case AVRO_RECORD:
+ rval =
+ write_record(writer, enc,
+ avro_schema_to_record(writer_schema), datum);
+ break;
+ case AVRO_ENUM:
+ rval = write_enum(writer, enc, writer_schema, datum);
+ break;
+ case AVRO_FIXED:
+ rval = write_fixed(writer, enc, writer_schema, datum);
+ break;
+ case AVRO_MAP:
+ rval =
+ write_map(writer, enc, avro_schema_to_map(writer_schema),
+ avro_datum_to_map(datum));
+ break;
+ case AVRO_ARRAY:
+ rval =
+ write_array(writer, enc,
+ avro_schema_to_array(writer_schema),
+ avro_datum_to_array(datum));
+ break;
+
+ case AVRO_UNION:
+ {
+ assert(0 && "Bug in schema validation code");
+ }
+ break;
+
+ case AVRO_LINK:
+ rval =
+ avro_write_data(writer,
+ (avro_schema_to_link(writer_schema))->to,
+ datum);
+ break;
+ }
+ return rval;
+}
Modified: hadoop/avro/trunk/lang/c/src/io.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/io.c?rev=901950&r1=901949&r2=901950&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/io.c (original)
+++ hadoop/avro/trunk/lang/c/src/io.c Fri Jan 22 00:38:01 2010
@@ -21,6 +21,7 @@
#include <string.h>
#include "avro.h"
#include "container_of.h"
+#include "dump.h"
enum avro_io_type_t {
AVRO_FILE_IO,
@@ -215,6 +216,22 @@
return -1;
}
+void avro_writer_dump(avro_writer_t writer, FILE * fp)
+{
+ if (is_memory_io(writer)) {
+ dump(fp, (char *)avro_writer_to_memory(writer)->buf,
+ avro_writer_to_memory(writer)->written);
+ }
+}
+
+void avro_reader_dump(avro_reader_t reader, FILE * fp)
+{
+ if (is_memory_io(reader)) {
+ dump(fp, (char *)avro_reader_to_memory(reader)->buf,
+ avro_reader_to_memory(reader)->read);
+ }
+}
+
void avro_reader_free(avro_reader_t reader)
{
if (is_memory_io(reader)) {
Modified: hadoop/avro/trunk/lang/c/src/schema.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/schema.c?rev=901950&r1=901949&r2=901950&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/schema.c (original)
+++ hadoop/avro/trunk/lang/c/src/schema.c Fri Jan 22 00:38:01 2010
@@ -307,9 +307,12 @@
find_named_schemas(const char *name, avro_schema_error_t * error)
{
st_table *st = (*error)->named_schemas;
- avro_schema_t schema;
- if (st_lookup(st, (st_data_t) name, (st_data_t *) & schema)) {
- return schema;
+ union {
+ avro_schema_t schema;
+ st_data_t data;
+ } val;
+ if (st_lookup(st, (st_data_t) name, &(val.data))) {
+ return val.schema;
}
return NULL;
};
Modified: hadoop/avro/trunk/lang/c/src/schema.h
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/schema.h?rev=901950&r1=901949&r2=901950&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/schema.h (original)
+++ hadoop/avro/trunk/lang/c/src/schema.h Fri Jan 22 00:38:01 2010
@@ -33,6 +33,7 @@
struct avro_record_schema_t {
struct avro_obj_t obj;
char *name;
+ /* TODO: st_table of names for faster lookup on record_read() */
STAILQ_HEAD(fields, avro_record_field_t) fields;
};
Modified: hadoop/avro/trunk/lang/c/tests/test_avro_data.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/tests/test_avro_data.c?rev=901950&r1=901949&r2=901950&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/tests/test_avro_data.c (original)
+++ hadoop/avro/trunk/lang/c/tests/test_avro_data.c Fri Jan 22 00:38:01 2010
@@ -46,7 +46,7 @@
fprintf(stderr, "Unable to encode/decode %s\n", type);
exit(EXIT_FAILURE);
}
-
+ avro_reader_dump(reader, stderr);
avro_datum_decref(datum_out);
avro_reader_free(reader);
avro_writer_free(writer);
@@ -104,7 +104,7 @@
return 0;
}
-static test_double(void)
+static int test_double(void)
{
int i;
for (i = 0; i < 100; i++) {
@@ -131,9 +131,9 @@
static int test_boolean(void)
{
int i;
- for (i = 0; i < 100; i++) {
+ for (i = 0; i <= 1; i++) {
avro_schema_t schema = avro_schema_boolean();
- avro_datum_t datum = avro_boolean(rand() % 2);
+ avro_datum_t datum = avro_boolean(i);
write_read_check(schema, NULL, datum, "boolean");
avro_datum_decref(datum);
}
@@ -149,15 +149,22 @@
return 0;
}
-int test_record(void)
+static int test_record(void)
{
- /*
- * TODO
- */
+ avro_schema_t schema = avro_schema_record("person");
+ avro_datum_t datum = avro_record("person");
+
+ avro_schema_record_field_append(schema, "name", avro_schema_string());
+ avro_schema_record_field_append(schema, "age", avro_schema_int());
+
+ avro_record_field_set(datum, "name", avro_string("Joseph Campbell"));
+ avro_record_field_set(datum, "age", avro_int(83));
+
+ write_read_check(schema, NULL, datum, "record");
return 0;
}
-int test_enum(void)
+static int test_enum(void)
{
/*
* TODO
@@ -165,7 +172,7 @@
return 0;
}
-int test_array(void)
+static int test_array(void)
{
int i, rval;
avro_schema_t schema = avro_schema_array(avro_schema_int());
@@ -182,7 +189,7 @@
return 0;
}
-int test_map(void)
+static int test_map(void)
{
avro_schema_t schema = avro_schema_map(avro_schema_long());
avro_datum_t datum = avro_map();
@@ -198,7 +205,7 @@
return 0;
}
-int test_union(void)
+static int test_union(void)
{
/*
* TODO
@@ -206,7 +213,7 @@
return 0;
}
-int test_fixed(void)
+static int test_fixed(void)
{
/*
* TODO
@@ -240,12 +247,10 @@
srandom(time(NULL));
for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
struct avro_tests *test = tests + i;
- fprintf(stderr, "Running %s tests...\n", test->name);
+ fprintf(stderr, "**** Running %s tests ****\n", test->name);
if (test->func() != 0) {
- fprintf(stderr, "failed!\n");
return EXIT_FAILURE;
}
- fprintf(stderr, "\t... %s tests passed!\n", test->name);
}
return EXIT_SUCCESS;
}
Modified: hadoop/avro/trunk/lang/c/version.sh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/version.sh?rev=901950&r1=901949&r2=901950&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/version.sh (original)
+++ hadoop/avro/trunk/lang/c/version.sh Fri Jan 22 00:38:01 2010
@@ -18,9 +18,9 @@
# libavro_binary_age = 0
# libavro_interface_age = 0
#
-libavro_micro_version=6
+libavro_micro_version=7
libavro_interface_age=0
-libavro_binary_age=1
+libavro_binary_age=2
# IGNORE EVERYTHING ELSE FROM HERE DOWN.........
if test $# != 1; then