You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by dc...@apache.org on 2011/07/15 03:22:20 UTC

svn commit: r1146936 - in /avro/trunk: CHANGES.txt lang/c/src/CMakeLists.txt lang/c/src/Makefile.am lang/c/src/avrocat.c lang/c/src/avropipe.c

Author: dcreager
Date: Fri Jul 15 01:22:18 2011
New Revision: 1146936

URL: http://svn.apache.org/viewvc?rev=1146936&view=rev
Log:
AVRO-396. C: avrocat and avropipe commands

The new avrocat command prints the contents of an Avro file to stdout.
Each record appears on a separate line, and is formatted using the Avro
JSON encoding.

The new avropipe command prints the same contents, but the output format
is the same as if you had run the JSON encoding of the data through the
jsonpipe [1] command.  This provides a format that's more amenable to
processing using standard Unix tools, since individual scalar values (no
matter how deep the original record) appear on separate lines.

[1] https://github.com/dvxhouse/jsonpipe

Added:
    avro/trunk/lang/c/src/avrocat.c
    avro/trunk/lang/c/src/avropipe.c
Modified:
    avro/trunk/CHANGES.txt
    avro/trunk/lang/c/src/CMakeLists.txt
    avro/trunk/lang/c/src/Makefile.am

Modified: avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=1146936&r1=1146935&r2=1146936&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Fri Jul 15 01:22:18 2011
@@ -31,6 +31,8 @@ Avro 1.6.0 (unreleased)
 
     AVRO-861. C: Remove dependency on stdbool.h. (dcreager)
 
+    AVRO-396. C: avrocat and avropipe commands (dcreager)
+
   BUG FIXES
 
     AVRO-845. setup.py uses Python2.7+ specific code

Modified: avro/trunk/lang/c/src/CMakeLists.txt
URL: http://svn.apache.org/viewvc/avro/trunk/lang/c/src/CMakeLists.txt?rev=1146936&r1=1146935&r2=1146936&view=diff
==============================================================================
--- avro/trunk/lang/c/src/CMakeLists.txt (original)
+++ avro/trunk/lang/c/src/CMakeLists.txt Fri Jul 15 01:22:18 2011
@@ -125,3 +125,11 @@ set(VERSION ${AVRO_VERSION})
 configure_file(avro-c.pc.in avro-c.pc)
 install(FILES ${CMAKE_CURRENT_BINARY_DIR}/avro-c.pc
         DESTINATION lib/pkgconfig)
+
+add_executable(avrocat avrocat.c)
+target_link_libraries(avrocat avro-static)
+install(TARGETS avrocat RUNTIME DESTINATION bin)
+
+add_executable(avropipe avropipe.c)
+target_link_libraries(avropipe avro-static)
+install(TARGETS avropipe RUNTIME DESTINATION bin)

Modified: avro/trunk/lang/c/src/Makefile.am
URL: http://svn.apache.org/viewvc/avro/trunk/lang/c/src/Makefile.am?rev=1146936&r1=1146935&r2=1146936&view=diff
==============================================================================
--- avro/trunk/lang/c/src/Makefile.am (original)
+++ avro/trunk/lang/c/src/Makefile.am Fri Jul 15 01:22:18 2011
@@ -3,6 +3,8 @@ AM_CPPFLAGS=-I$(top_srcdir)/jansson/src
 AM_CFLAGS=-Wall -Wextra -Wunused-parameter
 ACLOCAL_AMFLAGS=-I m4
 
+bin_PROGRAMS =
+
 pkgconfigdir = $(libdir)/pkgconfig
 pkgconfig_DATA = avro-c.pc
 
@@ -39,3 +41,11 @@ libavro_la_LDFLAGS = \
         -version-info $(LIBAVRO_VERSION) \
         -release $(VERSION) \
         -export-dynamic
+
+bin_PROGRAMS += avrocat
+avrocat_SOURCES = avrocat.c
+avrocat_LDADD = libavro.la
+
+bin_PROGRAMS += avropipe
+avropipe_SOURCES = avropipe.c
+avropipe_LDADD = libavro.la

Added: avro/trunk/lang/c/src/avrocat.c
URL: http://svn.apache.org/viewvc/avro/trunk/lang/c/src/avrocat.c?rev=1146936&view=auto
==============================================================================
--- avro/trunk/lang/c/src/avrocat.c (added)
+++ avro/trunk/lang/c/src/avrocat.c Fri Jul 15 01:22:18 2011
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.  See the License for the specific language governing
+ * permissions and limitations under the License.
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "avro.h"
+#include "avro_private.h"
+
+
+/*-- PROCESSING A FILE --*/
+
+static void
+process_file(const char *filename)
+{
+	avro_file_reader_t  reader;
+
+	if (avro_file_reader(filename, &reader)) {
+		fprintf(stderr, "Error opening %s:\n  %s\n",
+			filename, strerror(errno));
+		exit(1);
+	}
+
+	avro_datum_t  datum;
+
+	while (avro_file_reader_read(reader, NULL, &datum) == 0) {
+		char  *json;
+		avro_datum_to_json(datum, 1, &json);
+		printf("%s\n", json);
+		free(json);
+	}
+
+	avro_file_reader_close(reader);
+}
+
+
+/*-- MAIN PROGRAM --*/
+
+static void usage(void)
+{
+	fprintf(stderr,
+		"Usage: avrocat <avro data file>\n");
+}
+
+
+int main(int argc, char **argv)
+{
+	char  *data_filename;
+
+	if (argc == 2) {
+		data_filename = argv[1];
+	} else if (argc == 1) {
+		fprintf(stderr, "Must provide an input file.\n");
+		usage();
+		exit(1);
+	} else {
+		fprintf(stderr, "Can't read from multiple input files.\n");
+		usage();
+		exit(1);
+	}
+
+	/* Process the data file */
+	process_file(data_filename);
+	return 0;
+}

Added: avro/trunk/lang/c/src/avropipe.c
URL: http://svn.apache.org/viewvc/avro/trunk/lang/c/src/avropipe.c?rev=1146936&view=auto
==============================================================================
--- avro/trunk/lang/c/src/avropipe.c (added)
+++ avro/trunk/lang/c/src/avropipe.c Fri Jul 15 01:22:18 2011
@@ -0,0 +1,417 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.  See the License for the specific language governing
+ * permissions and limitations under the License.
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <getopt.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "avro.h"
+#include "avro_private.h"
+
+
+/* The path separator to use in the JSON output. */
+
+static const char  *separator = "/";
+
+
+/*-- PROCESSING A FILE --*/
+
+/**
+ * Fills in a raw string with the path to an element of an array.
+ */
+
+static void
+create_array_prefix(avro_raw_string_t *dest, const char *prefix, size_t index)
+{
+	static char  buf[100];
+	snprintf(buf, sizeof(buf), "%zu", index);
+	avro_raw_string_set(dest, prefix);
+	avro_raw_string_append(dest, separator);
+	avro_raw_string_append(dest, buf);
+}
+
+static void
+create_object_prefix(avro_raw_string_t *dest, const char *prefix, const char *key)
+{
+	/*
+	 * Make sure that the key doesn't contain the separator
+	 * character.
+	 */
+
+	if (strstr(key, separator) != NULL) {
+		fprintf(stderr,
+			"Error: Element \"%s\" in object %s "
+			"contains the separator character.\n"
+			"Please use the --separator option to choose another.\n",
+			key, prefix);
+		exit(1);
+	}
+
+	avro_raw_string_set(dest, prefix);
+	avro_raw_string_append(dest, separator);
+	avro_raw_string_append(dest, key);
+}
+
+static void
+print_bytes_value(const char *buf, size_t size)
+{
+	size_t  i;
+	printf("\"");
+	for (i = 0; i < size; i++)
+	{
+		if (buf[i] == '"') {
+			printf("\\\"");
+		} else if (buf[i] == '\\') {
+			printf("\\\\");
+		} else if (buf[i] == '\b') {
+			printf("\\b");
+		} else if (buf[i] == '\f') {
+			printf("\\f");
+		} else if (buf[i] == '\n') {
+			printf("\\n");
+		} else if (buf[i] == '\r') {
+			printf("\\r");
+		} else if (buf[i] == '\t') {
+			printf("\\t");
+		} else if (isprint(buf[i])) {
+			printf("%c", (int) buf[i]);
+		} else {
+			printf("\\u00%02x", (unsigned int) (unsigned char) buf[i]);
+		}
+	}
+	printf("\"");
+}
+
+static void
+process_value(const char *prefix, avro_value_t *value);
+
+static void
+process_array(const char *prefix, avro_value_t *value)
+{
+	printf("%s\t[]\n", prefix);
+	size_t  element_count;
+	avro_value_get_size(value, &element_count);
+
+	avro_raw_string_t  element_prefix;
+	avro_raw_string_init(&element_prefix);
+
+	size_t  i;
+	for (i = 0; i < element_count; i++) {
+		avro_value_t  element_value;
+		avro_value_get_by_index(value, i, &element_value, NULL);
+
+		create_array_prefix(&element_prefix, prefix, i);
+		process_value(avro_raw_string_get(&element_prefix), &element_value);
+	}
+
+	avro_raw_string_done(&element_prefix);
+}
+
+static void
+process_enum(const char *prefix, avro_value_t *value)
+{
+	int  val;
+	const char  *symbol_name;
+
+	avro_schema_t  schema = avro_value_get_schema(value);
+	avro_value_get_enum(value, &val);
+	symbol_name = avro_schema_enum_get(schema, val);
+	printf("%s\t", prefix);
+	print_bytes_value(symbol_name, strlen(symbol_name));
+	printf("\n");
+}
+
+static void
+process_map(const char *prefix, avro_value_t *value)
+{
+	printf("%s\t{}\n", prefix);
+	size_t  element_count;
+	avro_value_get_size(value, &element_count);
+
+	avro_raw_string_t  element_prefix;
+	avro_raw_string_init(&element_prefix);
+
+	size_t  i;
+	for (i = 0; i < element_count; i++) {
+		const char  *key;
+		avro_value_t  element_value;
+		avro_value_get_by_index(value, i, &element_value, &key);
+
+		create_object_prefix(&element_prefix, prefix, key);
+		process_value(avro_raw_string_get(&element_prefix), &element_value);
+	}
+
+	avro_raw_string_done(&element_prefix);
+}
+
+static void
+process_record(const char *prefix, avro_value_t *value)
+{
+	printf("%s\t{}\n", prefix);
+	size_t  field_count;
+	avro_value_get_size(value, &field_count);
+
+	avro_raw_string_t  field_prefix;
+	avro_raw_string_init(&field_prefix);
+
+	size_t  i;
+	for (i = 0; i < field_count; i++) {
+		avro_value_t  field_value;
+		const char  *field_name;
+		avro_value_get_by_index(value, i, &field_value, &field_name);
+
+		create_object_prefix(&field_prefix, prefix, field_name);
+		process_value(avro_raw_string_get(&field_prefix), &field_value);
+	}
+
+	avro_raw_string_done(&field_prefix);
+}
+
+static void
+process_union(const char *prefix, avro_value_t *value)
+{
+	avro_value_t  branch_value;
+	avro_value_get_current_branch(value, &branch_value);
+
+	/* nulls in a union aren't wrapped in a JSON object */
+	if (avro_value_get_type(&branch_value) == AVRO_NULL) {
+		printf("%s\tnull\n", prefix);
+		return;
+	}
+
+	int  discriminant;
+	avro_value_get_discriminant(value, &discriminant);
+
+	avro_schema_t  schema = avro_value_get_schema(value);
+	avro_schema_t  branch_schema = avro_schema_union_branch(schema, discriminant);
+	const char  *branch_name = avro_schema_type_name(branch_schema);
+
+	avro_raw_string_t  branch_prefix;
+	avro_raw_string_init(&branch_prefix);
+	create_object_prefix(&branch_prefix, prefix, branch_name);
+
+	printf("%s\t{}\n", prefix);
+	process_value(avro_raw_string_get(&branch_prefix), &branch_value);
+
+	avro_raw_string_done(&branch_prefix);
+}
+
+static void
+process_value(const char *prefix, avro_value_t *value)
+{
+	avro_type_t  type = avro_value_get_type(value);
+	switch (type) {
+		case AVRO_BOOLEAN:
+		{
+			bool  val;
+			avro_value_get_boolean(value, &val);
+			printf("%s\t%s\n", prefix, val? "true": "false");
+			return;
+		}
+
+		case AVRO_BYTES:
+		{
+			const void  *buf;
+			size_t  size;
+			avro_value_get_bytes(value, &buf, &size);
+			printf("%s\t", prefix);
+			print_bytes_value(buf, size);
+			printf("\n");
+			return;
+		}
+
+		case AVRO_DOUBLE:
+		{
+			double  val;
+			avro_value_get_double(value, &val);
+			printf("%s\t%lf\n", prefix, val);
+			return;
+		}
+
+		case AVRO_FLOAT:
+		{
+			float  val;
+			avro_value_get_float(value, &val);
+			printf("%s\t%f\n", prefix, val);
+			return;
+		}
+
+		case AVRO_INT32:
+		{
+			int32_t  val;
+			avro_value_get_int(value, &val);
+			printf("%s\t%" PRId32 "\n", prefix, val);
+			return;
+		}
+
+		case AVRO_INT64:
+		{
+			int64_t  val;
+			avro_value_get_long(value, &val);
+			printf("%s\t%" PRId64 "\n", prefix, val);
+			return;
+		}
+
+		case AVRO_NULL:
+		{
+			avro_value_get_null(value);
+			printf("%s\tnull\n", prefix);
+			return;
+		}
+
+		case AVRO_STRING:
+		{
+			/* TODO: Convert the UTF-8 to the current
+			 * locale's character set */
+			const char  *buf;
+			size_t  size;
+			avro_value_get_string(value, &buf, &size);
+			printf("%s\t", prefix);
+			print_bytes_value(buf, size);
+			printf("\n");
+			return;
+		}
+
+		case AVRO_ARRAY:
+			process_array(prefix, value);
+			return;
+
+		case AVRO_ENUM:
+			process_enum(prefix, value);
+			return;
+
+		case AVRO_FIXED:
+		{
+			const void  *buf;
+			size_t  size;
+			avro_value_get_fixed(value, &buf, &size);
+			printf("%s\t", prefix);
+			print_bytes_value(buf, size);
+			printf("\n");
+			return;
+		}
+
+		case AVRO_MAP:
+			process_map(prefix, value);
+			return;
+
+		case AVRO_RECORD:
+			process_record(prefix, value);
+			return;
+
+		case AVRO_UNION:
+			process_union(prefix, value);
+			return;
+
+		default:
+		{
+			fprintf(stderr, "Unknown schema type\n");
+			exit(1);
+		}
+	}
+}
+
+static void
+process_file(const char *filename)
+{
+	avro_file_reader_t  reader;
+
+	if (avro_file_reader(filename, &reader)) {
+		fprintf(stderr, "Error opening %s:\n  %s\n",
+			filename, strerror(errno));
+		exit(1);
+	}
+
+	/* The JSON root is an array */
+	printf("%s\t[]\n", separator);
+
+	avro_raw_string_t  prefix;
+	avro_raw_string_init(&prefix);
+
+	avro_datum_t  datum;
+	size_t  record_number = 0;
+
+	for (; avro_file_reader_read(reader, NULL, &datum) == 0; record_number++) {
+		avro_value_t  value;
+		avro_datum_as_value(&value, datum);
+		create_array_prefix(&prefix, "", record_number);
+		process_value(avro_raw_string_get(&prefix), &value);
+		avro_value_done(&value);
+	}
+
+	avro_raw_string_done(&prefix);
+	avro_file_reader_close(reader);
+}
+
+
+/*-- MAIN PROGRAM --*/
+
+static struct option longopts[] = {
+	{ "separator", required_argument, NULL, 's' },
+	{ NULL, 0, NULL, 0 }
+};
+
+static void usage(void)
+{
+	fprintf(stderr,
+		"Usage: avropipe [--separator=<separator>]\n"
+		"                <avro data file>\n");
+}
+
+
+int main(int argc, char **argv)
+{
+	char  *data_filename;
+
+	int  ch;
+	while ((ch = getopt_long(argc, argv, "s:", longopts, NULL)) != -1) {
+		switch (ch) {
+			case 's':
+				separator = optarg;
+				break;
+
+			default:
+				usage();
+				exit(1);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	if (argc == 1) {
+		data_filename = argv[0];
+	} else if (argc == 0) {
+		fprintf(stderr, "Must provide an input file.\n");
+		usage();
+		exit(1);
+	} else {
+		fprintf(stderr, "Can't read from multiple input files.\n");
+		usage();
+		exit(1);
+	}
+
+	/* Process the data file */
+	process_file(data_filename);
+	return 0;
+}