You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by ma...@apache.org on 2010/01/19 22:13:05 UTC

svn commit: r900960 - in /hadoop/avro/trunk: CHANGES.txt lang/c/src/avro.h lang/c/src/datum.c lang/c/src/datum.h lang/c/src/encoding_binary.c lang/c/src/schema.h lang/c/tests/Makefile.am lang/c/tests/test_avro_data.c lang/c/version.sh

Author: massie
Date: Tue Jan 19 21:13:05 2010
New Revision: 900960

URL: http://svn.apache.org/viewvc?rev=900960&view=rev
Log:
AVRO-352.  Incorrect binary encoding for strings and bytes

Modified:
    hadoop/avro/trunk/CHANGES.txt
    hadoop/avro/trunk/lang/c/src/avro.h
    hadoop/avro/trunk/lang/c/src/datum.c
    hadoop/avro/trunk/lang/c/src/datum.h
    hadoop/avro/trunk/lang/c/src/encoding_binary.c
    hadoop/avro/trunk/lang/c/src/schema.h
    hadoop/avro/trunk/lang/c/tests/Makefile.am
    hadoop/avro/trunk/lang/c/tests/test_avro_data.c
    hadoop/avro/trunk/lang/c/version.sh

Modified: hadoop/avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/CHANGES.txt?rev=900960&r1=900959&r2=900960&view=diff
==============================================================================
--- hadoop/avro/trunk/CHANGES.txt (original)
+++ hadoop/avro/trunk/CHANGES.txt Tue Jan 19 21:13:05 2010
@@ -333,6 +333,8 @@
  
     AVRO-347. Add the --unsafe flag to asciidoc in order to include source/header files (massie)
 
+    AVRO-352. Incorrect binary encoding for strings and bytes (massie)
+
 Avro 1.2.0 (14 October 2009)
 
   INCOMPATIBLE CHANGES

Modified: hadoop/avro/trunk/lang/c/src/avro.h
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/avro.h?rev=900960&r1=900959&r2=900960&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/avro.h (original)
+++ hadoop/avro/trunk/lang/c/src/avro.h Tue Jan 19 21:13:05 2010
@@ -194,10 +194,12 @@
 
   void avro_datum_print (avro_datum_t value, FILE * fp);
 
-  int schema_match (avro_schema_t writers_schema,
+  int avro_datum_equal (avro_datum_t a, avro_datum_t b);
+
+  int avro_schema_match (avro_schema_t writers_schema,
 		    avro_schema_t readers_schema);
 
-  int schema_datum_validate (avro_schema_t expected_schema,
+  int avro_schema_datum_validate (avro_schema_t expected_schema,
 			     avro_datum_t datum);
 
   int avro_read_data (avro_reader_t reader, avro_schema_t writer_schema,

Modified: hadoop/avro/trunk/lang/c/src/datum.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum.c?rev=900960&r1=900959&r2=900960&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum.c (original)
+++ hadoop/avro/trunk/lang/c/src/datum.c Tue Jan 19 21:13:05 2010
@@ -20,6 +20,7 @@
 #include <string.h>
 #include <errno.h>
 #include <limits.h>
+#include <assert.h>
 #include "avro.h"
 #include "schema.h"
 #include "datum.h"
@@ -33,6 +34,52 @@
   datum->refcount = 1;
 }
 
+int
+avro_datum_equal (avro_datum_t a, avro_datum_t b)
+{
+  if (!(is_avro_datum (a) && is_avro_datum (b)))
+    {
+      return 0;
+    }
+  if (avro_typeof (a) != avro_typeof (b))
+    {
+      return 0;
+    }
+  switch (avro_typeof (a))
+    {
+    case AVRO_STRING:
+      return strcmp (avro_datum_to_string (a)->s,
+		     avro_datum_to_string (b)->s) == 0;
+    case AVRO_BYTES:
+      return (avro_datum_to_bytes (a)->size == avro_datum_to_bytes (b)->size)
+	&& memcmp (avro_datum_to_bytes (a)->bytes,
+		   avro_datum_to_bytes (b)->bytes,
+		   avro_datum_to_bytes (a)->size) == 0;
+    case AVRO_INT:
+      return avro_datum_to_int (a)->i == avro_datum_to_int (b)->i;
+    case AVRO_LONG:
+      return avro_datum_to_long (a)->l == avro_datum_to_long (b)->l;
+    case AVRO_FLOAT:
+      return avro_datum_to_float (a)->f == avro_datum_to_float (b)->f;
+    case AVRO_DOUBLE:
+      return avro_datum_to_double (a)->d == avro_datum_to_double (b)->d;
+    case AVRO_BOOLEAN:
+      return avro_datum_to_boolean (a)->i == avro_datum_to_boolean (b)->i;
+    case AVRO_NULL:
+      return 1;
+    case AVRO_RECORD:
+    case AVRO_ENUM:
+    case AVRO_FIXED:
+    case AVRO_MAP:
+    case AVRO_ARRAY:
+    case AVRO_UNION:
+    case AVRO_LINK:
+      /* TODO */
+      return 0;
+    }
+  return 0;
+}
+
 avro_datum_t
 avro_string (const char *str)
 {
@@ -49,7 +96,7 @@
 }
 
 avro_datum_t
-avro_bytes (const char *buf, int64_t len)
+avro_bytes (const char *bytes, int64_t size)
 {
   struct avro_bytes_datum_t *datum =
     malloc (sizeof (struct avro_bytes_datum_t));
@@ -57,14 +104,14 @@
     {
       return NULL;
     }
-  datum->buf = malloc (len);
-  if (!datum->buf)
+  datum->bytes = malloc (size);
+  if (!datum->bytes)
     {
       free (datum);
       return NULL;
     }
-  memcpy (datum->buf, buf, len);
-  datum->len = len;
+  memcpy (datum->bytes, bytes, size);
+  datum->size = size;
 
   avro_datum_init (&datum->obj, AVRO_BYTES);
   return &datum->obj;
@@ -282,7 +329,7 @@
 }
 
 int
-schema_match (avro_schema_t writers_schema, avro_schema_t readers_schema)
+avro_schema_match (avro_schema_t writers_schema, avro_schema_t readers_schema)
 {
   if (is_avro_union (writers_schema) || is_avro_union (readers_schema))
     {
@@ -408,7 +455,16 @@
   int rval = EINVAL;
   const avro_encoding_t *enc = &avro_binary_encoding;
 
-  if (!reader || !schema_match (writers_schema, readers_schema) || !datum)
+  if (!reader || !is_avro_schema (writers_schema) || !datum)
+    {
+      return EINVAL;
+    }
+
+  if (readers_schema == NULL)
+    {
+      readers_schema = writers_schema;
+    }
+  else if (!avro_schema_match (writers_schema, readers_schema))
     {
       return EINVAL;
     }
@@ -423,7 +479,7 @@
       for (branch = STAILQ_FIRST (&union_schema->branches);
 	   branch != NULL; branch = STAILQ_NEXT (branch, branches))
 	{
-	  if (schema_match (writers_schema, branch->schema))
+	  if (avro_schema_match (writers_schema, branch->schema))
 	    {
 	      return avro_read_data (reader, writers_schema, branch->schema,
 				     datum);
@@ -520,7 +576,9 @@
       break;
 
     case AVRO_LINK:
-      /* TODO */
+      rval =
+	avro_read_data (reader, (avro_schema_to_link (writers_schema))->to,
+			readers_schema, datum);
       break;
     }
 
@@ -537,7 +595,7 @@
 schema_map_validate_foreach (char *key, avro_datum_t datum,
 			     struct validate_st *vst)
 {
-  if (!schema_datum_validate (vst->expected_schema, datum))
+  if (!avro_schema_datum_validate (vst->expected_schema, datum))
     {
       vst->rval = 0;
       return ST_STOP;
@@ -546,150 +604,323 @@
 }
 
 int
-schema_datum_validate (avro_schema_t expected_schema, avro_datum_t datum)
+avro_schema_datum_validate (avro_schema_t expected_schema, avro_datum_t datum)
 {
   if (!is_avro_schema (expected_schema) || !is_avro_datum (datum))
     {
       return EINVAL;
     }
-  /* null */
-  if (is_avro_null (expected_schema) && is_avro_null (datum))
-    {
-      return 1;
-    }
-  /* boolean */
-  else if (is_avro_boolean (expected_schema) && is_avro_boolean (datum))
-    {
-      return 1;
-    }
-  /* string */
-  else if (is_avro_string (expected_schema) && is_avro_boolean (datum))
-    {
-      return 1;
-    }
-  /* bytes */
-  else if (is_avro_bytes (expected_schema) && is_avro_bytes (datum))
-    {
-      return 1;
-    }
-  /* int */
-  else if (is_avro_int (expected_schema)
-	   && (is_avro_int (datum)
-	       || (is_avro_long (datum)
-		   && (INT_MIN <= (avro_datum_to_long (datum))->l
-		       && (avro_datum_to_long (datum))->l <= INT_MAX))))
-    {
-      return 1;
-    }
-  /* long */
-  else if (is_avro_long (expected_schema)
-	   && (is_avro_int (datum) || is_avro_long (datum)))
-    {
-      return 1;
-    }
-  /* float or double */
-  else
-    if ((is_avro_float (expected_schema) || is_avro_double (expected_schema))
-	&& (is_avro_int (datum) || is_avro_long (datum)
-	    || is_avro_float (datum)))
-    {
-      return 1;
-    }
-  /* fixed */
-  else if (is_avro_fixed (expected_schema)
-	   && (is_avro_fixed (datum)
-	       && ((avro_schema_to_fixed (expected_schema))->size ==
-		   (avro_datum_to_fixed (datum))->size)))
-    {
-      return 1;
-    }
-  /* enum */
-  else if (is_avro_enum (expected_schema))
-    {
-      struct avro_enum_schema_t *enump =
-	avro_schema_to_enum (expected_schema);
-      struct avro_enum_symbol_t *symbol = STAILQ_FIRST (&enump->symbols);
-      while (symbol)
-	{
-	  if (!strcmp (symbol->symbol, (avro_datum_to_enum (datum))->symbol))
-	    {
-	      return 1;
-	    }
-	  symbol = STAILQ_NEXT (symbol, symbols);
-	}
-      return 0;
-    }
-  /* array */
-  else if (is_avro_array (expected_schema) && is_avro_array (datum))
-    {
-      struct avro_array_datum_t *array = avro_datum_to_array (datum);
-      struct avro_array_element_t *el = STAILQ_FIRST (&array->els);
-      while (el)
-	{
-	  if (!schema_datum_validate
-	      ((avro_schema_to_array (expected_schema))->items, el->datum))
-	    {
-	      return 0;
-	    }
-	  el = STAILQ_NEXT (el, els);
-	}
-      return 1;
-    }
-  /* map */
-  else if (is_avro_map (expected_schema) && is_avro_map (datum))
-    {
-      struct validate_st vst = { expected_schema, 1 };
-      st_foreach ((avro_datum_to_map (datum))->map,
-		  schema_map_validate_foreach, (st_data_t) & vst);
-      return vst.rval;
-    }
-  /* union */
-  else if (is_avro_union (expected_schema))
+
+  switch (avro_typeof (expected_schema))
     {
-      struct avro_union_schema_t *union_schema =
-	avro_schema_to_union (expected_schema);
-      struct avro_union_branch_t *branch;
+    case AVRO_NULL:
+      return is_avro_null (datum);
 
-      for (branch = STAILQ_FIRST (&union_schema->branches);
-	   branch != NULL; branch = STAILQ_NEXT (branch, branches))
+    case AVRO_BOOLEAN:
+      return is_avro_boolean (datum);
+
+    case AVRO_STRING:
+      return is_avro_string (datum);
+
+    case AVRO_BYTES:
+      return is_avro_bytes (datum);
+
+    case AVRO_INT:
+      return is_avro_int (datum)
+	|| (is_avro_long (datum)
+	    && (INT_MIN <= avro_datum_to_long (datum)->l
+		&& avro_datum_to_long (datum)->l <= INT_MAX));
+
+    case AVRO_LONG:
+      return is_avro_int (datum) || is_avro_long (datum);
+
+    case AVRO_FLOAT:
+      return is_avro_int (datum) || is_avro_long (datum)
+	|| is_avro_float (datum);
+
+    case AVRO_DOUBLE:
+      return is_avro_int (datum) || is_avro_long (datum)
+	|| is_avro_float (datum) || is_avro_double (datum);
+
+    case AVRO_FIXED:
+      return (is_avro_fixed (datum)
+	      && (avro_schema_to_fixed (expected_schema)->size ==
+		  avro_datum_to_fixed (datum)->size));
+
+    case AVRO_ENUM:
+      {
+	struct avro_enum_schema_t *enump =
+	  avro_schema_to_enum (expected_schema);
+	struct avro_enum_symbol_t *symbol = STAILQ_FIRST (&enump->symbols);
+	while (symbol)
+	  {
+	    if (!strcmp (symbol->symbol, avro_datum_to_enum (datum)->symbol))
+	      {
+		return 1;
+	      }
+	    symbol = STAILQ_NEXT (symbol, symbols);
+	  }
+	return 0;
+      }
+      break;
+
+    case AVRO_ARRAY:
+      {
+	if (is_avro_array (datum))
+	  {
+	    struct avro_array_datum_t *array = avro_datum_to_array (datum);
+	    struct avro_array_element_t *el = STAILQ_FIRST (&array->els);
+	    while (el)
+	      {
+		if (!avro_schema_datum_validate
+		    ((avro_schema_to_array (expected_schema))->items,
+		     el->datum))
+		  {
+		    return 0;
+		  }
+		el = STAILQ_NEXT (el, els);
+	      }
+	    return 1;
+	  }
+	return 0;
+      }
+      break;
+
+    case AVRO_MAP:
+      if (is_avro_map (datum))
 	{
-	  if (schema_datum_validate (branch->schema, datum))
-	    {
-	      return 1;
-	    }
+	  struct validate_st vst = { expected_schema, 1 };
+	  st_foreach (avro_datum_to_map (datum)->map,
+		      schema_map_validate_foreach, (st_data_t) & vst);
+	  return vst.rval;
 	}
-      return 0;
-    }
-  /* record */
-  else if (is_avro_record (expected_schema) && is_avro_record (datum))
-    {
-      struct avro_record_schema_t *record_schema =
-	avro_schema_to_record (expected_schema);
-      struct avro_record_field_t *field;
-      for (field = STAILQ_FIRST (&record_schema->fields);
-	   field != NULL; field = STAILQ_NEXT (field, fields))
+      break;
+
+    case AVRO_UNION:
+      {
+	struct avro_union_schema_t *union_schema =
+	  avro_schema_to_union (expected_schema);
+	struct avro_union_branch_t *branch;
+
+	for (branch = STAILQ_FIRST (&union_schema->branches);
+	     branch != NULL; branch = STAILQ_NEXT (branch, branches))
+	  {
+	    if (avro_schema_datum_validate (branch->schema, datum))
+	      {
+		return 1;
+	      }
+	  }
+	return 0;
+      }
+      break;
+
+    case AVRO_RECORD:
+      if (is_avro_record (datum))
 	{
-	  avro_datum_t field_datum =
-	    avro_record_field_get (datum, field->name);
-	  if (!field_datum)
+	  struct avro_record_schema_t *record_schema =
+	    avro_schema_to_record (expected_schema);
+	  struct avro_record_field_t *field;
+	  for (field = STAILQ_FIRST (&record_schema->fields);
+	       field != NULL; field = STAILQ_NEXT (field, fields))
 	    {
-	      /* TODO: check for default values */
-	      return 0;
-	    }
-	  if (!schema_datum_validate (field->type, field_datum))
-	    {
-	      return 0;
+	      avro_datum_t field_datum =
+		avro_record_field_get (datum, field->name);
+	      if (!field_datum)
+		{
+		  /* TODO: check for default values */
+		  return 0;
+		}
+	      if (!avro_schema_datum_validate (field->type, field_datum))
+		{
+		  return 0;
+		}
 	    }
+	  return 1;
 	}
-      return 1;
+      break;
+
+    case AVRO_LINK:
+      {
+	return
+	  avro_schema_datum_validate ((avro_schema_to_link (expected_schema))->to,
+				 datum);
+      }
+      break;
     }
   return 0;
 }
 
+static int
+write_record (avro_writer_t writer, const avro_encoding_t * enc,
+	      avro_schema_t writer_schema, avro_datum_t datum)
+{
+  /* TODO */
+  return EINVAL;
+}
+
+static int
+write_enum (avro_writer_t writer, const avro_encoding_t * enc,
+	    avro_schema_t writer_schema, avro_datum_t datum)
+{
+  /* TODO */
+  return EINVAL;
+}
+
+static int
+write_fixed (avro_writer_t writer, const avro_encoding_t * enc,
+	     avro_schema_t writer_schema, avro_datum_t datum)
+{
+  /* TODO */
+  return EINVAL;
+}
+
+static int
+write_map (avro_writer_t writer, const avro_encoding_t * enc,
+	   avro_schema_t writer_schema, avro_datum_t datum)
+{
+  /* TODO */
+  return EINVAL;
+}
+
+static int
+write_array (avro_writer_t writer, const avro_encoding_t * enc,
+	     avro_schema_t writer_schema, avro_datum_t datum)
+{
+  /* TODO */
+  return EINVAL;
+}
+
 int
 avro_write_data (avro_writer_t writer, avro_schema_t writer_schema,
 		 avro_datum_t datum)
 {
-  /* TODO */
-  return 1;
+  const avro_encoding_t *enc = &avro_binary_encoding;
+  int rval = -1;
+
+  if (!(is_avro_schema (writer_schema) && is_avro_datum (datum)))
+    {
+      return EINVAL;
+    }
+  if (!avro_schema_datum_validate (writer_schema, datum))
+    {
+      return EINVAL;
+    }
+  switch (avro_typeof (writer_schema))
+    {
+    case AVRO_NULL:
+      rval = enc->write_null (writer);
+      break;
+    case AVRO_BOOLEAN:
+      rval = enc->write_boolean (writer, avro_datum_to_boolean (datum)->i);
+      break;
+    case AVRO_STRING:
+      rval = enc->write_string (writer, avro_datum_to_string (datum)->s);
+      break;
+    case AVRO_BYTES:
+      rval = enc->write_bytes (writer, avro_datum_to_bytes (datum)->bytes,
+			       avro_datum_to_bytes (datum)->size);
+      break;
+    case AVRO_INT:
+      {
+	int32_t i;
+	if (is_avro_int (datum))
+	  {
+	    i = avro_datum_to_int (datum)->i;
+	  }
+	else if (is_avro_long (datum))
+	  {
+	    i = (int32_t) avro_datum_to_long (datum)->l;
+	  }
+	else
+	  {
+	    assert (0 && "Serious bug in schema validation code");
+	  }
+	rval = enc->write_int (writer, i);
+      }
+      break;
+    case AVRO_LONG:
+      rval = enc->write_long (writer, avro_datum_to_long (datum)->l);
+      break;
+    case AVRO_FLOAT:
+      {
+	float f;
+	if (is_avro_int (datum))
+	  {
+	    f = (float) (avro_datum_to_int (datum)->i);
+	  }
+	else if (is_avro_long (datum))
+	  {
+	    f = (float) (avro_datum_to_long (datum)->l);
+	  }
+	else if (is_avro_float (datum))
+	  {
+	    f = avro_datum_to_float (datum)->f;
+	  }
+	else if (is_avro_double (datum))
+	  {
+	    f = (float) (avro_datum_to_double (datum)->d);
+	  }
+	else
+	  {
+	    assert (0 && "Serious bug in schema validation code");
+	  }
+	rval = enc->write_float (writer, f);
+      }
+      break;
+    case AVRO_DOUBLE:
+      {
+	double d;
+	if (is_avro_int (datum))
+	  {
+	    d = (double) (avro_datum_to_int (datum)->i);
+	  }
+	else if (is_avro_long (datum))
+	  {
+	    d = (double) (avro_datum_to_long (datum)->l);
+	  }
+	else if (is_avro_float (datum))
+	  {
+	    d = (double) (avro_datum_to_float (datum)->f);
+	  }
+	else if (is_avro_double (datum))
+	  {
+	    d = avro_datum_to_double (datum)->d;
+	  }
+	else
+	  {
+	    assert (0 && "Bug in schema validation code");
+	  }
+	rval = enc->write_double (writer, d);
+      }
+      break;
+
+    case AVRO_RECORD:
+      rval = write_record (writer, enc, writer_schema, datum);
+      break;
+    case AVRO_ENUM:
+      rval = write_enum (writer, enc, writer_schema, datum);
+      break;
+    case AVRO_FIXED:
+      rval = write_fixed (writer, enc, writer_schema, datum);
+      break;
+    case AVRO_MAP:
+      rval = write_map (writer, enc, writer_schema, datum);
+      break;
+    case AVRO_ARRAY:
+      rval = write_array (writer, enc, writer_schema, datum);
+      break;
+
+    case AVRO_UNION:
+      {
+	assert (0 && "Bug in schema validation code");
+      }
+      break;
+
+    case AVRO_LINK:
+      rval =
+	avro_write_data (writer, (avro_schema_to_link (writer_schema))->to,
+			 datum);
+      break;
+    }
+  return rval;
 }

Modified: hadoop/avro/trunk/lang/c/src/datum.h
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum.h?rev=900960&r1=900959&r2=900960&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum.h (original)
+++ hadoop/avro/trunk/lang/c/src/datum.h Tue Jan 19 21:13:05 2010
@@ -33,8 +33,8 @@
 struct avro_bytes_datum_t
 {
   struct avro_obj_t obj;
-  char *buf;
-  size_t len;
+  char *bytes;
+  int64_t size;
 };
 
 struct avro_int_datum_t
@@ -107,17 +107,17 @@
     STAILQ_HEAD (els, avro_array_element_t) els;
 };
 
-#define avro_datum_to_string(datum_)    container_of(datum_, struct avro_string_datum_t, obj)
-#define avro_datum_to_bytes(datum_)     container_of(datum_, struct avro_bytes_datum_t, obj)
-#define avro_datum_to_int(datum_)       container_of(datum_, struct avro_int_datum_t, obj)
-#define avro_datum_to_long(datum_)      container_of(datum_, struct avro_long_datum_t, obj)
-#define avro_datum_to_float(datum_)     container_of(datum_, struct avro_float_datum_t, obj)
-#define avro_datum_to_double(datum_)    container_of(datum_, struct avro_double_datum_t, obj)
-#define avro_datum_to_boolean(datum_)   container_of(datum_, struct avro_boolean_datum_t, obj)
-#define avro_datum_to_fixed(datum_)     container_of(datum_, struct avro_fixed_datum_t, obj)
-#define avro_datum_to_map(datum_)       container_of(datum_, struct avro_map_datum_t, obj)
-#define avro_datum_to_record(datum_)    container_of(datum_, struct avro_record_datum_t, obj)
-#define avro_datum_to_enum(datum_)      container_of(datum_, struct avro_enum_datum_t, obj)
-#define avro_datum_to_array(datum_)     container_of(datum_, struct avro_array_datum_t, obj)
+#define avro_datum_to_string(datum_)    (container_of(datum_, struct avro_string_datum_t, obj))
+#define avro_datum_to_bytes(datum_)     (container_of(datum_, struct avro_bytes_datum_t, obj))
+#define avro_datum_to_int(datum_)       (container_of(datum_, struct avro_int_datum_t, obj))
+#define avro_datum_to_long(datum_)      (container_of(datum_, struct avro_long_datum_t, obj))
+#define avro_datum_to_float(datum_)     (container_of(datum_, struct avro_float_datum_t, obj))
+#define avro_datum_to_double(datum_)    (container_of(datum_, struct avro_double_datum_t, obj))
+#define avro_datum_to_boolean(datum_)   (container_of(datum_, struct avro_boolean_datum_t, obj))
+#define avro_datum_to_fixed(datum_)     (container_of(datum_, struct avro_fixed_datum_t, obj))
+#define avro_datum_to_map(datum_)       (container_of(datum_, struct avro_map_datum_t, obj))
+#define avro_datum_to_record(datum_)    (container_of(datum_, struct avro_record_datum_t, obj))
+#define avro_datum_to_enum(datum_)      (container_of(datum_, struct avro_enum_datum_t, obj))
+#define avro_datum_to_array(datum_)     (container_of(datum_, struct avro_array_datum_t, obj))
 
 #endif

Modified: hadoop/avro/trunk/lang/c/src/encoding_binary.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/encoding_binary.c?rev=900960&r1=900959&r2=900960&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/encoding_binary.c (original)
+++ hadoop/avro/trunk/lang/c/src/encoding_binary.c Tue Jan 19 21:13:05 2010
@@ -27,7 +27,7 @@
 static int
 read_long (avro_reader_t reader, int64_t * l)
 {
-  uint64_t n = 0;
+  uint64_t value = 0;
   uint8_t b;
   int offset = 0;
   do
@@ -38,11 +38,11 @@
 	  return EILSEQ;
 	}
       AVRO_READ (reader, &b, 1);
-      n |= (int64_t) (b & 0x7F) << (7 * offset);
+      value |= (int64_t) (b & 0x7F) << (7 * offset);
       ++offset;
     }
   while (b & 0x80);
-  *l = (n >> 1) ^ -(n & 1);
+  *l = ((value >> 1) ^ -(value & 1));
   return 0;
 }
 
@@ -69,7 +69,7 @@
 {
   uint8_t b;
   uint64_t n = (l << 1) ^ (l >> 63);
-  while ((n & ~0x7F) != 0)
+  while (n & ~0x7F)
     {
       b = ((((uint8_t) n) & 0x7F) | 0x80);
       AVRO_WRITE (writer, &b, 1);
@@ -123,7 +123,7 @@
     {
       return ENOMEM;
     }
-  *bytes[*len] = '\0';
+  (*bytes)[*len] = '\0';
   AVRO_READ (reader, *bytes, *len);
   return 0;
 }
@@ -144,10 +144,16 @@
 static int
 write_bytes (avro_writer_t writer, const char *bytes, const int64_t len)
 {
+  int rval;
   if (len < 0)
     {
       return EINVAL;
     }
+  rval = write_long (writer, len);
+  if (rval)
+    {
+      return rval;
+    }
   AVRO_WRITE (writer, (char *) bytes, len);
   return 0;
 }

Modified: hadoop/avro/trunk/lang/c/src/schema.h
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/schema.h?rev=900960&r1=900959&r2=900960&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/schema.h (original)
+++ hadoop/avro/trunk/lang/c/src/schema.h Tue Jan 19 21:13:05 2010
@@ -88,13 +88,13 @@
   avro_schema_t to;
 };
 
-#define avro_schema_to_record(schema_)  container_of(schema_, struct avro_record_schema_t, obj)
-#define avro_schema_to_enum(schema_)    container_of(schema_, struct avro_enum_schema_t, obj)
-#define avro_schema_to_array(schema_)   container_of(schema_, struct avro_array_schema_t, obj)
-#define avro_schema_to_map(schema_)     container_of(schema_, struct avro_map_schema_t, obj)
-#define avro_schema_to_union(schema_)   container_of(schema_, struct avro_union_schema_t, obj)
-#define avro_schema_to_fixed(schema_)   container_of(schema_, struct avro_fixed_schema_t, obj)
-#define avro_schema_to_link(schema_)    container_of(schema_, struct avro_link_schema_t, obj)
+#define avro_schema_to_record(schema_)  (container_of(schema_, struct avro_record_schema_t, obj))
+#define avro_schema_to_enum(schema_)    (container_of(schema_, struct avro_enum_schema_t, obj))
+#define avro_schema_to_array(schema_)   (container_of(schema_, struct avro_array_schema_t, obj))
+#define avro_schema_to_map(schema_)     (container_of(schema_, struct avro_map_schema_t, obj))
+#define avro_schema_to_union(schema_)   (container_of(schema_, struct avro_union_schema_t, obj))
+#define avro_schema_to_fixed(schema_)   (container_of(schema_, struct avro_fixed_schema_t, obj))
+#define avro_schema_to_link(schema_)    (container_of(schema_, struct avro_link_schema_t, obj))
 
 static inline avro_schema_t
 avro_schema_incref (avro_schema_t schema)

Modified: hadoop/avro/trunk/lang/c/tests/Makefile.am
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/tests/Makefile.am?rev=900960&r1=900959&r2=900960&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/tests/Makefile.am (original)
+++ hadoop/avro/trunk/lang/c/tests/Makefile.am Tue Jan 19 21:13:05 2010
@@ -5,7 +5,7 @@
 
 EXTRA_DIST=schema_tests
 
-check_PROGRAMS=test_avro_schema
+check_PROGRAMS=test_avro_schema test_avro_data
 
 test_LDADD=$(top_builddir)/src/libavro.la
 

Modified: hadoop/avro/trunk/lang/c/tests/test_avro_data.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/tests/test_avro_data.c?rev=900960&r1=900959&r2=900960&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/tests/test_avro_data.c (original)
+++ hadoop/avro/trunk/lang/c/tests/test_avro_data.c Tue Jan 19 21:13:05 2010
@@ -20,8 +20,10 @@
 #include <stdint.h>
 #include <time.h>
 #include <string.h>
+#include <assert.h>
 #include "avro.h"
 #include "dump.h"
+#include "datum.h"
 
 char buf[4096];
 
@@ -51,7 +53,6 @@
 test_string (void)
 {
   int i;
-  int status;
   const char *strings[] = { "Four score and seven years ago",
     "our father brought forth on this continent",
     "a new nation", "conceived in Liberty",
@@ -59,8 +60,38 @@
   };
   for (i = 0; i < sizeof (strings) / sizeof (strings[0]); i++)
     {
-      avro_binary_encode_to_memory (buf, sizeof (buf), avro_schema_string (),
-				    avro_string (strings[i]));
+      avro_reader_t reader;
+      avro_writer_t writer;
+      avro_schema_t writer_schema = avro_schema_string ();
+      avro_datum_t datum_in = avro_string (strings[i]);
+      avro_datum_t datum_out;
+
+      reader = avro_reader_memory (buf, sizeof (buf));
+      if (!reader)
+	{
+	  assert (0 && "Can't create a memory reader");
+	}
+      writer = avro_writer_memory (buf, sizeof (buf));
+      if (!writer)
+	{
+	  assert (0 && "Can't create a memory writer");
+	}
+      if (avro_write_data (writer, writer_schema, datum_in))
+	{
+	  assert (0 && "Can't write string");
+	}
+      if (avro_read_data (reader, writer_schema, NULL, &datum_out))
+	{
+	  assert (0 && "Can't read string");
+	}
+      if (!avro_datum_equal (datum_in, datum_out))
+	{
+	  assert (0 && "String didn't survive encoding/decoding");
+	}
+      avro_datum_decref (datum_in);
+      avro_datum_decref (datum_out);
+      avro_reader_free (reader);
+      avro_writer_free (writer);
     }
   return 0;
 }
@@ -68,136 +99,155 @@
 static int
 test_bytes (void)
 {
-  int status;
   char bytes[] = { 0xDE, 0xAD, 0xBE, 0xEF };
-  char *read_bytes;
+  avro_reader_t reader = avro_reader_memory (buf, sizeof (buf));
+  avro_writer_t writer = avro_writer_memory (buf, sizeof (buf));
+  avro_schema_t writer_schema = avro_schema_bytes ();
+  avro_datum_t datum_in = avro_bytes (bytes, sizeof (bytes));
+  avro_datum_t datum_out;
 
-#if 0
-  status = avro_write_bytes (encoder, bytes, sizeof (bytes));
-  if (status != 0)
+  if (avro_write_data (writer, writer_schema, datum_in))
     {
-      return status;
+      assert (0 && "Unable to write bytes");
     }
-  status = avro_read_bytes (decoder, pool, (void *) &read_bytes, &len);
-  if (status != 0)
+  if (avro_read_data (reader, writer_schema, NULL, &datum_out))
     {
-      return status;
+      assert (0 && "Unable to read bytes");
     }
-  if (len != sizeof (bytes))
+  if (!avro_datum_equal (datum_in, datum_out))
     {
-      return 1;
+      assert (0 && "Byte did not encode/decode correctly");
     }
-  if (memcmp (bytes, read_bytes, len))
+  avro_datum_decref (datum_in);
+  avro_datum_decref (datum_out);
+  avro_reader_free (reader);
+  avro_writer_free (writer);
+  return 0;
+}
+
+static int
+test_int_long (int long_test)
+{
+  int i;
+  for (i = 0; i < 100; i++)
     {
-      return 1;
+      avro_reader_t reader = avro_reader_memory (buf, sizeof (buf));
+      avro_writer_t writer = avro_writer_memory (buf, sizeof (buf));
+      avro_schema_t writer_schema =
+	long_test ? avro_schema_long () : avro_schema_int ();
+      avro_datum_t datum_in = long_test ? avro_long (rand ()) :
+	avro_int (rand ());
+      avro_datum_t datum_out;
+
+      if (avro_write_data (writer, writer_schema, datum_in))
+	{
+	  assert (0 && "Unable to write int/long");
+	}
+      if (avro_read_data (reader, writer_schema, NULL, &datum_out))
+	{
+	  assert (0 && "Unable to read int/long");
+	}
+      if (!avro_datum_equal (datum_in, datum_out))
+	{
+	  assert (0 && "Unable to encode/decode int/long");
+	}
+      avro_datum_decref (datum_in);
+      avro_datum_decref (datum_out);
+      avro_reader_free (reader);
+      avro_writer_free (writer);
     }
-#endif
   return 0;
 }
 
 static int
 test_int (void)
 {
-  return 0;
+  return test_int_long (0);
 }
 
 static int
 test_long (void)
 {
-  return 0;
+  return test_int_long (1);
 }
 
-static int
-test_float (void)
+static
+test_float_double (int double_test)
 {
-  int status;
-  float input, output;
   int i;
-  int const num_rand_tests = 25;
 
-#if 0
-  for (i = 0; i < num_rand_tests; i++)
+  for (i = 0; i < 100; i++)
     {
-      input = random () * 1.1;
-      status = avro_write_float (encoder, input);
-      if (status != 0)
+      avro_reader_t reader = avro_reader_memory (buf, sizeof (buf));
+      avro_writer_t writer = avro_writer_memory (buf, sizeof (buf));
+      avro_schema_t schema =
+	double_test ? avro_schema_double () : avro_schema_float ();
+      avro_datum_t datum_in =
+	double_test ? avro_double ((double) (rand ())) :
+	avro_float ((float) (rand ()));
+      avro_datum_t datum_out;
+
+      if (avro_write_data (writer, schema, datum_in))
 	{
-	  return status;
+	  assert (0 && "Unable to write float/double");
 	}
-      status = avro_read_float (decoder, &output);
-      if (status != 0)
+      if (avro_read_data (reader, schema, NULL, &datum_out))
 	{
-	  return status;
+	  assert (0 && "Unable to read float/double");
 	}
-      if (input != output)
+      if (!avro_datum_equal (datum_in, datum_out))
 	{
-	  fprintf (stderr, "%f != %f\n", input, output);
-	  return 1;
+	  assert (0 && "Unable to encode/decode float/double");
 	}
+
+      avro_datum_decref (datum_in);
+      avro_datum_decref (datum_out);
+      avro_reader_free (reader);
+      avro_writer_free (writer);
     }
-#endif
   return 0;
 }
 
 static int
-test_double (void)
+test_float (void)
 {
-  int status;
-  double input, output;
-  int i;
-  int const num_rand_tests = 25;
+  return test_float_double (0);
+}
 
-#if 0
-  for (i = 0; i < num_rand_tests; i++)
-    {
-      input = random () * 1.1;
-      status = avro_write_double (encoder, input);
-      if (status != 0)
-	{
-	  return status;
-	}
-      status = avro_read_double (decoder, &output);
-      if (status != 0)
-	{
-	  return status;
-	}
-      if (input != output)
-	{
-	  fprintf (stderr, "%f != %f\n", input, output);
-	  return 1;
-	}
-    }
-#endif
-  return 0;
+static int
+test_double (void)
+{
+  return test_float_double (1);
 }
 
 static int
 test_boolean (void)
 {
-  int status;
-  int i, bool_in, bool_out;
-
-#if 0
-  for (i = 0; i < 2; i++)
+  int i;
+  for (i = 0; i < 1000; i++)
     {
-      bool_in = i;
-      status = avro_write_bool (encoder, bool_in);
-      if (status != 0)
+      avro_reader_t reader = avro_reader_memory (buf, sizeof (buf));
+      avro_writer_t writer = avro_writer_memory (buf, sizeof (buf));
+      avro_schema_t schema = avro_schema_boolean ();
+      avro_datum_t datum_in = avro_boolean (rand () % 2);
+      avro_datum_t datum_out;
+
+      if (avro_write_data (writer, schema, datum_in))
 	{
-	  return status;
+	  assert (0 && "Unable to write boolean");
 	}
-      status = avro_read_bool (decoder, &bool_out);
-      if (status != 0)
+      if (avro_read_data (reader, schema, schema, &datum_out))
 	{
-	  return status;
+	  assert (0 && "Unable to read boolean");
 	}
-      if (bool_in != bool_out)
+      if (!avro_datum_equal (datum_in, datum_out))
 	{
-	  fprintf (stderr, "%d != %d\n", bool_in, bool_out);
-	  return 1;
+	  assert (0 && "Unable to encode/decode boolean");
 	}
+
+      avro_reader_free (reader);
+      avro_writer_free (writer);
     }
-#endif
   return 0;
 }
 
@@ -210,36 +260,42 @@
 int
 test_record (void)
 {
+  /* TODO */
   return 0;
 }
 
 int
 test_enum (void)
 {
+  /* TODO */
   return 0;
 }
 
 int
 test_array (void)
 {
+  /* TODO */
   return 0;
 }
 
 int
 test_map (void)
 {
+  /* TODO */
   return 0;
 }
 
 int
 test_union (void)
 {
+  /* TODO */
   return 0;
 }
 
 int
 test_fixed (void)
 {
+  /* TODO */
   return 0;
 }
 

Modified: hadoop/avro/trunk/lang/c/version.sh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/version.sh?rev=900960&r1=900959&r2=900960&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/version.sh (original)
+++ hadoop/avro/trunk/lang/c/version.sh Tue Jan 19 21:13:05 2010
@@ -18,9 +18,9 @@
 #         libavro_binary_age = 0
 #         libavro_interface_age = 0
 #
-libavro_micro_version=4
+libavro_micro_version=5
 libavro_interface_age=0
-libavro_binary_age=1
+libavro_binary_age=0
 
 # IGNORE EVERYTHING ELSE FROM HERE DOWN.........
 if test $# != 1; then