You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by ma...@apache.org on 2010/03/04 03:15:53 UTC
svn commit: r918818 - in /hadoop/avro/trunk: CHANGES.txt
lang/c/src/Makefile.am lang/c/src/avro.h lang/c/src/datum_size.c
lang/c/src/encoding.h lang/c/src/encoding_binary.c
lang/c/tests/test_avro_data.c lang/c/version.sh
Author: massie
Date: Thu Mar 4 02:15:53 2010
New Revision: 918818
URL: http://svn.apache.org/viewvc?rev=918818&view=rev
Log:
AVRO-445. avro_size_data() to pre-calculate the size of an avro_datum_t in serialized form. Contributed by Bruce Mitchener.
Added:
hadoop/avro/trunk/lang/c/src/datum_size.c
Modified:
hadoop/avro/trunk/CHANGES.txt
hadoop/avro/trunk/lang/c/src/Makefile.am
hadoop/avro/trunk/lang/c/src/avro.h
hadoop/avro/trunk/lang/c/src/encoding.h
hadoop/avro/trunk/lang/c/src/encoding_binary.c
hadoop/avro/trunk/lang/c/tests/test_avro_data.c
hadoop/avro/trunk/lang/c/version.sh
Modified: hadoop/avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/CHANGES.txt?rev=918818&r1=918817&r2=918818&view=diff
==============================================================================
--- hadoop/avro/trunk/CHANGES.txt (original)
+++ hadoop/avro/trunk/CHANGES.txt Thu Mar 4 02:15:53 2010
@@ -16,6 +16,9 @@
AVRO-438. Clarify spec. (Amichai Rothman via cutting)
+ AVRO-445. avro_size_data() to pre-calculate the size of an
+ avro_datum_t in serialized form (Bruce Mitchener via massie)
+
BUG FIXES
AVRO-424. Fix the specification of the deflate codec.
Modified: hadoop/avro/trunk/lang/c/src/Makefile.am
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/Makefile.am?rev=918818&r1=918817&r2=918818&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/Makefile.am (original)
+++ hadoop/avro/trunk/lang/c/src/Makefile.am Thu Mar 4 02:15:53 2010
@@ -7,7 +7,7 @@
lib_LTLIBRARIES = libavro.la
libavro_la_SOURCES = st.c st.h schema.c schema.h schema_equal.c \
-datum.c datum_equal.c datum_validate.c datum_read.c datum_skip.c datum_write.c datum.h \
+datum.c datum_equal.c datum_validate.c datum_read.c datum_skip.c datum_write.c datum_size.c datum.h \
io.c dump.c dump.h encoding_binary.c \
avro_private.h encoding.h datafile.c
libavro_la_LIBADD = $(top_builddir)/jansson/src/.libs/libjansson.a
Modified: hadoop/avro/trunk/lang/c/src/avro.h
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/avro.h?rev=918818&r1=918817&r2=918818&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/avro.h (original)
+++ hadoop/avro/trunk/lang/c/src/avro.h Thu Mar 4 02:15:53 2010
@@ -257,6 +257,8 @@
int avro_skip_data(avro_reader_t reader, avro_schema_t writer_schema);
int avro_write_data(avro_writer_t writer,
avro_schema_t writer_schema, avro_datum_t datum);
+int64_t avro_size_data(avro_writer_t writer,
+ avro_schema_t writer_schema, avro_datum_t datum);
/* File object container */
typedef struct avro_file_reader_t *avro_file_reader_t;
Added: hadoop/avro/trunk/lang/c/src/datum_size.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum_size.c?rev=918818&view=auto
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum_size.c (added)
+++ hadoop/avro/trunk/lang/c/src/datum_size.c Thu Mar 4 02:15:53 2010
@@ -0,0 +1,291 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ */
+#include <errno.h>
+#include <assert.h>
+#include <string.h>
+#include "schema.h"
+#include "datum.h"
+#include "encoding.h"
+
+#define size_check(rval, call) { rval = call; if(rval) return rval; }
+#define size_accum(rval, size, call) { rval = call; if (rval < 0) return rval; else size += rval; }
+
+static int64_t size_datum(avro_writer_t writer, const avro_encoding_t * enc,
+ avro_schema_t writers_schema, avro_datum_t datum);
+
+static int64_t
+size_record(avro_writer_t writer, const avro_encoding_t * enc,
+ struct avro_record_schema_t *schema, avro_datum_t datum)
+{
+ int rval;
+ long i;
+ int64_t size;
+ avro_datum_t field_datum;
+
+ size = 0;
+ if (schema) {
+ for (i = 0; i < schema->fields->num_entries; i++) {
+ union {
+ st_data_t data;
+ struct avro_record_field_t *field;
+ } val;
+ st_lookup(schema->fields, i, &val.data);
+ size_check(rval,
+ avro_record_get(datum, val.field->name,
+ &field_datum));
+ size_accum(rval, size,
+ size_datum(writer, enc, val.field->type,
+ field_datum));
+ }
+ } else {
+ /* No schema. Just write the record datum */
+ struct avro_record_datum_t *record =
+ avro_datum_to_record(datum);
+ for (i = 0; i < record->field_order->num_entries; i++) {
+ union {
+ st_data_t data;
+ char *name;
+ } val;
+ st_lookup(record->field_order, i, &val.data);
+ size_check(rval,
+ avro_record_get(datum, val.name,
+ &field_datum));
+ size_accum(rval, size,
+ size_datum(writer, enc, NULL, field_datum));
+ }
+ }
+ return size;
+}
+
+static int64_t
+size_enum(avro_writer_t writer, const avro_encoding_t * enc,
+ struct avro_enum_schema_t *enump, struct avro_enum_datum_t *datum)
+{
+ return enc->size_long(writer, datum->value);
+}
+
+struct size_map_args {
+ int rval;
+ int64_t size;
+ avro_writer_t writer;
+ const avro_encoding_t *enc;
+ avro_schema_t values_schema;
+};
+
+static int
+size_map_foreach(char *key, avro_datum_t datum, struct size_map_args *args)
+{
+ int rval = args->enc->size_string(args->writer, key);
+ if (rval < 0) {
+ args->rval = rval;
+ return ST_STOP;
+ } else {
+ args->size += rval;
+ }
+ rval = size_datum(args->writer, args->enc, args->values_schema, datum);
+ if (rval < 0) {
+ args->rval = rval;
+ return ST_STOP;
+ } else {
+ args->size += rval;
+ }
+ return ST_CONTINUE;
+}
+
+static int64_t
+size_map(avro_writer_t writer, const avro_encoding_t * enc,
+ struct avro_map_schema_t *writers_schema,
+ struct avro_map_datum_t *datum)
+{
+ int rval;
+ int64_t size;
+ struct size_map_args args = { 0, 0, writer, enc,
+ writers_schema ? writers_schema->values : NULL
+ };
+
+ size = 0;
+ if (datum->map->num_entries) {
+ size_accum(rval, size,
+ enc->size_long(writer, datum->map->num_entries));
+ st_foreach(datum->map, size_map_foreach, (st_data_t) & args);
+ size += args.size;
+ }
+ if (!args.rval) {
+ size_accum(rval, size, enc->size_long(writer, 0));
+ }
+ return size;
+}
+
+static int64_t
+size_array(avro_writer_t writer, const avro_encoding_t * enc,
+ struct avro_array_schema_t *schema, struct avro_array_datum_t *array)
+{
+ int rval;
+ long i;
+ int64_t size;
+
+ size = 0;
+ if (array->els->num_entries) {
+ size_accum(rval, size,
+ enc->size_long(writer, array->els->num_entries));
+ for (i = 0; i < array->els->num_entries; i++) {
+ union {
+ st_data_t data;
+ avro_datum_t datum;
+ } val;
+ st_lookup(array->els, i, &val.data);
+ size_accum(rval, size,
+ size_datum(writer, enc,
+ schema ? schema->items : NULL,
+ val.datum));
+ }
+ }
+ size_accum(rval, size, enc->size_long(writer, 0));
+ return size;
+}
+
+static int64_t
+size_union(avro_writer_t writer, const avro_encoding_t * enc,
+ struct avro_union_schema_t *schema,
+ struct avro_union_datum_t *unionp)
+{
+ int rval;
+ int64_t size;
+ avro_schema_t write_schema = NULL;
+
+ size = 0;
+ size_accum(rval, size, enc->size_long(writer, unionp->discriminant));
+ if (schema) {
+ union {
+ st_data_t data;
+ avro_schema_t schema;
+ } val;
+ if (!st_lookup
+ (schema->branches, unionp->discriminant, &val.data)) {
+ return -EINVAL;
+ }
+ write_schema = val.schema;
+ }
+ size_accum(rval, size,
+ size_datum(writer, enc, write_schema, unionp->value));
+ return size;
+}
+
+static int64_t size_datum(avro_writer_t writer, const avro_encoding_t * enc,
+ avro_schema_t writers_schema, avro_datum_t datum)
+{
+ if (is_avro_schema(writers_schema) && is_avro_link(writers_schema)) {
+ return size_datum(writer, enc,
+ (avro_schema_to_link(writers_schema))->to,
+ datum);
+ }
+
+ switch (avro_typeof(datum)) {
+ case AVRO_NULL:
+ return enc->size_null(writer);
+
+ case AVRO_BOOLEAN:
+ return enc->size_boolean(writer,
+ avro_datum_to_boolean(datum)->i);
+
+ case AVRO_STRING:
+ return enc->size_string(writer, avro_datum_to_string(datum)->s);
+
+ case AVRO_BYTES:
+ return enc->size_bytes(writer,
+ avro_datum_to_bytes(datum)->bytes,
+ avro_datum_to_bytes(datum)->size);
+
+ case AVRO_INT32:
+ case AVRO_INT64:{
+ int64_t val = avro_typeof(datum) == AVRO_INT32 ?
+ avro_datum_to_int32(datum)->i32 :
+ avro_datum_to_int64(datum)->i64;
+ if (is_avro_schema(writers_schema)) {
+ /* handle promotion */
+ if (is_avro_float(writers_schema)) {
+ return enc->size_float(writer,
+ (float)val);
+ } else if (is_avro_double(writers_schema)) {
+ return enc->size_double(writer,
+ (double)val);
+ }
+ }
+ return enc->size_long(writer, val);
+ }
+
+ case AVRO_FLOAT:{
+ float val = avro_datum_to_float(datum)->f;
+ if (is_avro_schema(writers_schema)
+ && is_avro_double(writers_schema)) {
+ /* handle promotion */
+ return enc->size_double(writer, (double)val);
+ }
+ return enc->size_float(writer, val);
+ }
+
+ case AVRO_DOUBLE:
+ return enc->size_double(writer, avro_datum_to_double(datum)->d);
+
+ case AVRO_RECORD:
+ return size_record(writer, enc,
+ avro_schema_to_record(writers_schema),
+ datum);
+
+ case AVRO_ENUM:
+ return size_enum(writer, enc,
+ avro_schema_to_enum(writers_schema),
+ avro_datum_to_enum(datum));
+
+ case AVRO_FIXED:
+ return avro_datum_to_fixed(datum)->size;
+
+ case AVRO_MAP:
+ return size_map(writer, enc,
+ avro_schema_to_map(writers_schema),
+ avro_datum_to_map(datum));
+
+ case AVRO_ARRAY:
+ return size_array(writer, enc,
+ avro_schema_to_array(writers_schema),
+ avro_datum_to_array(datum));
+
+ case AVRO_UNION:
+ return size_union(writer, enc,
+ avro_schema_to_union(writers_schema),
+ avro_datum_to_union(datum));
+
+ case AVRO_LINK:
+ break;
+ }
+
+ return 0;
+}
+
+int64_t avro_size_data(avro_writer_t writer, avro_schema_t writers_schema,
+ avro_datum_t datum)
+{
+ if (!writer || !is_avro_datum(datum)) {
+ return -EINVAL;
+ }
+ /* Only validate datum if a writer's schema is provided */
+ if (is_avro_schema(writers_schema)
+ && !avro_schema_datum_validate(writers_schema, datum)) {
+ return -EINVAL;
+ }
+ return size_datum(writer, &avro_binary_encoding, writers_schema, datum);
+}
Modified: hadoop/avro/trunk/lang/c/src/encoding.h
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/encoding.h?rev=918818&r1=918817&r2=918818&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/encoding.h (original)
+++ hadoop/avro/trunk/lang/c/src/encoding.h Thu Mar 4 02:15:53 2010
@@ -29,6 +29,7 @@
int (*read_string) (avro_reader_t reader, char **s);
int (*skip_string) (avro_reader_t reader);
int (*write_string) (avro_writer_t writer, const char *s);
+ int64_t(*size_string) (avro_writer_t writer, const char *s);
/*
* bytes
*/
@@ -36,42 +37,50 @@
int (*skip_bytes) (avro_reader_t reader);
int (*write_bytes) (avro_writer_t writer,
const char *bytes, const int64_t len);
+ int64_t(*size_bytes) (avro_writer_t writer,
+ const char *bytes, const int64_t len);
/*
* int
*/
int (*read_int) (avro_reader_t reader, int32_t * i);
int (*skip_int) (avro_reader_t reader);
int (*write_int) (avro_writer_t writer, const int32_t i);
+ int64_t(*size_int) (avro_writer_t writer, const int32_t i);
/*
* long
*/
int (*read_long) (avro_reader_t reader, int64_t * l);
int (*skip_long) (avro_reader_t reader);
int (*write_long) (avro_writer_t writer, const int64_t l);
+ int64_t(*size_long) (avro_writer_t writer, const int64_t l);
/*
* float
*/
int (*read_float) (avro_reader_t reader, float *f);
int (*skip_float) (avro_reader_t reader);
int (*write_float) (avro_writer_t writer, const float f);
+ int64_t(*size_float) (avro_writer_t writer, const float f);
/*
* double
*/
int (*read_double) (avro_reader_t reader, double *d);
int (*skip_double) (avro_reader_t reader);
int (*write_double) (avro_writer_t writer, const double d);
+ int64_t(*size_double) (avro_writer_t writer, const double d);
/*
* boolean
*/
int (*read_boolean) (avro_reader_t reader, int8_t * b);
int (*skip_boolean) (avro_reader_t reader);
int (*write_boolean) (avro_writer_t writer, const int8_t b);
+ int64_t(*size_boolean) (avro_writer_t writer, const int8_t b);
/*
* null
*/
int (*read_null) (avro_reader_t reader);
int (*skip_null) (avro_reader_t reader);
int (*write_null) (avro_writer_t writer);
+ int64_t(*size_null) (avro_writer_t writer);
};
typedef struct avro_encoding_t avro_encoding_t;
Modified: hadoop/avro/trunk/lang/c/src/encoding_binary.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/encoding_binary.c?rev=918818&r1=918817&r2=918818&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/encoding_binary.c (original)
+++ hadoop/avro/trunk/lang/c/src/encoding_binary.c Thu Mar 4 02:15:53 2010
@@ -72,6 +72,18 @@
return 0;
}
+static int64_t size_long(avro_writer_t writer, int64_t l)
+{
+ int64_t len = 0;
+ uint64_t n = (l << 1) ^ (l >> 63);
+ while (n & ~0x7F) {
+ len++;
+ n >>= 7;
+ }
+ len++;
+ return len;
+}
+
static int read_int(avro_reader_t reader, int32_t * i)
{
int64_t l;
@@ -97,6 +109,12 @@
return write_long(writer, l);
}
+static int64_t size_int(avro_writer_t writer, const int32_t i)
+{
+ int64_t l = i;
+ return size_long(writer, l);
+}
+
static int read_bytes(avro_reader_t reader, char **bytes, int64_t * len)
{
int rval = read_long(reader, len);
@@ -138,6 +156,12 @@
return 0;
}
+static int64_t
+size_bytes(avro_writer_t writer, const char *bytes, const int64_t len)
+{
+ return size_long(writer, len) + len;
+}
+
static int read_string(avro_reader_t reader, char **s)
{
int64_t len;
@@ -155,6 +179,12 @@
return write_bytes(writer, s, len);
}
+static int64_t size_string(avro_writer_t writer, const char *s)
+{
+ int64_t len = strlen(s);
+ return size_bytes(writer, s, len);
+}
+
static int read_float(avro_reader_t reader, float *f)
{
#if WORDS_BIGENDIAN
@@ -205,6 +235,11 @@
return 0;
}
+static int64_t size_float(avro_writer_t writer, const float f)
+{
+ return 4;
+}
+
static int read_double(avro_reader_t reader, double *d)
{
#if WORDS_BIGENDIAN
@@ -264,6 +299,11 @@
return 0;
}
+static int64_t size_double(avro_writer_t writer, const double d)
+{
+ return 8;
+}
+
static int read_boolean(avro_reader_t reader, int8_t * b)
{
AVRO_READ(reader, b, 1);
@@ -282,6 +322,11 @@
return 0;
}
+static int64_t size_boolean(avro_writer_t writer, const int8_t b)
+{
+ return 1;
+}
+
static int read_skip_null(avro_reader_t reader)
{
/*
@@ -298,6 +343,11 @@
return 0;
}
+static int64_t size_null(avro_writer_t writer)
+{
+ return 0;
+}
+
const avro_encoding_t avro_binary_encoding = {
.description = "BINARY FORMAT",
/*
@@ -306,46 +356,54 @@
.read_string = read_string,
.skip_string = skip_string,
.write_string = write_string,
+ .size_string = size_string,
/*
* bytes
*/
.read_bytes = read_bytes,
.skip_bytes = skip_bytes,
.write_bytes = write_bytes,
+ .size_bytes = size_bytes,
/*
* int
*/
.read_int = read_int,
.skip_int = skip_int,
.write_int = write_int,
+ .size_int = size_int,
/*
* long
*/
.read_long = read_long,
.skip_long = skip_long,
.write_long = write_long,
+ .size_long = size_long,
/*
* float
*/
.read_float = read_float,
.skip_float = skip_float,
.write_float = write_float,
+ .size_float = size_float,
/*
* double
*/
.read_double = read_double,
.skip_double = skip_double,
.write_double = write_double,
+ .size_double = size_double,
/*
* boolean
*/
.read_boolean = read_boolean,
.skip_boolean = skip_boolean,
.write_boolean = write_boolean,
+ .size_boolean = size_boolean,
/*
* null
*/
.read_null = read_skip_null,
.skip_null = read_skip_null,
- .write_null = write_null
+ .write_null = write_null,
+ .size_null = size_null
};
Modified: hadoop/avro/trunk/lang/c/tests/test_avro_data.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/tests/test_avro_data.c?rev=918818&r1=918817&r2=918818&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/tests/test_avro_data.c (original)
+++ hadoop/avro/trunk/lang/c/tests/test_avro_data.c Thu Mar 4 02:15:53 2010
@@ -67,6 +67,15 @@
type, validate);
exit(EXIT_FAILURE);
}
+ int64_t size =
+ avro_size_data(writer, validate ? writers_schema : NULL,
+ datum);
+ if (size != avro_writer_tell(writer)) {
+ fprintf(stderr,
+ "Unable to calculate size %s validate=%d (%lld != %lld)\n",
+ type, validate, size, avro_writer_tell(writer));
+ exit(EXIT_FAILURE);
+ }
if (avro_read_data
(reader, writers_schema, readers_schema, &datum_out)) {
fprintf(stderr, "Unable to read %s validate=%d\n", type,
Modified: hadoop/avro/trunk/lang/c/version.sh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/version.sh?rev=918818&r1=918817&r2=918818&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/version.sh (original)
+++ hadoop/avro/trunk/lang/c/version.sh Thu Mar 4 02:15:53 2010
@@ -18,9 +18,9 @@
# libavro_binary_age = 0
# libavro_interface_age = 0
#
-libavro_micro_version=18
+libavro_micro_version=19
libavro_interface_age=0
-libavro_binary_age=0
+libavro_binary_age=1
# IGNORE EVERYTHING ELSE FROM HERE DOWN.........
if test $# != 1; then