You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by dk...@apache.org on 2018/12/10 18:56:45 UTC

[avro] branch master updated: AVRO-1167, AVRO-766: Fix c AVRO_LINK memory leaks (#217)

This is an automated email from the ASF dual-hosted git repository.

dkulp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/master by this push:
     new b597c3a  AVRO-1167, AVRO-766: Fix c AVRO_LINK memory leaks (#217)
b597c3a is described below

commit b597c3ab93ef1c6a37a9c1fa3584d86d20723784
Author: John Gill <jo...@users.noreply.github.com>
AuthorDate: Mon Dec 10 10:56:07 2018 -0800

    AVRO-1167, AVRO-766: Fix c AVRO_LINK memory leaks (#217)
    
    * AVRO-1167: Enhance avro_schema_copy for AVRO_LINK
    
    - Add hash of named schemas found during copy
    - Find saved named  schema for copy of AVRO_LINK
    
    * AVRO-766: Correct memory leaks in AVRO_LINK copy
    
    - Adds test cases for AVRO-766 & AVRO-1167
    - Corrects reference counting for avro_schema_copy
    
    * Enable TEST_AVRO_1167 in test_avro_766
    
    This ensures that both fixes work together and that no valgrind errors are produced from a recrusive schema.
---
 lang/c/src/schema.c           | 91 +++++++++++++++++++++++++++++++++----------
 lang/c/tests/CMakeLists.txt   |  2 +
 lang/c/tests/test_avro_1167.c | 84 +++++++++++++++++++++++++++++++++++++++
 lang/c/tests/test_avro_766.c  | 76 ++++++++++++++++++++++++++++++++++++
 4 files changed, 233 insertions(+), 20 deletions(-)

diff --git a/lang/c/src/schema.c b/lang/c/src/schema.c
index 4e1b6bd..c2b0b37 100644
--- a/lang/c/src/schema.c
+++ b/lang/c/src/schema.c
@@ -2,17 +2,17 @@
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to you under the Apache License, Version 2.0 
+ * The ASF licenses this file to you under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
- * 
+ *
  * http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
  * implied.  See the License for the specific language governing
- * permissions and limitations under the License. 
+ * permissions and limitations under the License.
  */
 
 #include "avro/allocation.h"
@@ -61,7 +61,7 @@ static int is_avro_id(const char *name)
 			}
 		}
 		/*
-		 * starts with [A-Za-z_] subsequent [A-Za-z0-9_] 
+		 * starts with [A-Za-z_] subsequent [A-Za-z0-9_]
 		 */
 		return 1;
 	}
@@ -199,7 +199,13 @@ static void avro_schema_free(avro_schema_t schema)
 		case AVRO_LINK:{
 				struct avro_link_schema_t *link;
 				link = avro_schema_to_link(schema);
-				avro_schema_decref(link->to);
+				/* Since we didn't increment the
+				 * reference count of the target
+				 * schema when we created the link, we
+				 * should not decrement the reference
+				 * count of the target schema when we
+				 * free the link.
+				 */
 				avro_freet(struct avro_link_schema_t, link);
 			}
 			break;
@@ -737,7 +743,19 @@ avro_schema_t avro_schema_link(avro_schema_t to)
 		avro_set_error("Cannot allocate new link schema");
 		return NULL;
 	}
-	link->to = avro_schema_incref(to);
+
+	/* Do not increment the reference count of target schema
+	 * pointed to by the AVRO_LINK. AVRO_LINKs are only valid
+	 * internal to a schema. The target schema pointed to by a
+	 * link will be valid as long as the top-level schema is
+	 * valid. Similarly, the link will be valid as long as the
+	 * top-level schema is valid. Therefore the validity of the
+	 * link ensures the validity of its target, and we don't need
+	 * an additional reference count on the target. This mechanism
+	 * of an implied validity also breaks reference count cycles
+	 * for recursive schemas, which result in memory leaks.
+	 */
+	link->to = to;
 	avro_schema_init(&link->obj, AVRO_LINK);
 	return &link->obj;
 }
@@ -817,7 +835,7 @@ avro_type_from_json_t(json_t *json, avro_type_t *type,
 		return EINVAL;
 	}
 	/*
-	 * TODO: gperf/re2c this 
+	 * TODO: gperf/re2c this
 	 */
 	if (strcmp(type_str, "string") == 0) {
 		*type = AVRO_STRING;
@@ -1274,7 +1292,7 @@ avro_schema_from_json_length(const char *jsontext, size_t length,
 	return avro_schema_from_json_root(root, schema);
 }
 
-avro_schema_t avro_schema_copy(avro_schema_t schema)
+avro_schema_t avro_schema_copy_root(avro_schema_t schema, st_table *named_schemas)
 {
 	long i;
 	avro_schema_t new_schema = NULL;
@@ -1291,7 +1309,7 @@ avro_schema_t avro_schema_copy(avro_schema_t schema)
 	case AVRO_BOOLEAN:
 	case AVRO_NULL:
 		/*
-		 * No need to copy primitives since they're static 
+		 * No need to copy primitives since they're static
 		 */
 		new_schema = schema;
 		break;
@@ -1303,6 +1321,10 @@ avro_schema_t avro_schema_copy(avro_schema_t schema)
 			new_schema =
 			    avro_schema_record(record_schema->name,
 					       record_schema->space);
+		    if (save_named_schemas(new_schema, named_schemas)) {
+   				avro_set_error("Cannot save enum schema");
+   				return NULL;
+   			}
 			for (i = 0; i < record_schema->fields->num_entries; i++) {
 				union {
 					st_data_t data;
@@ -1310,10 +1332,11 @@ avro_schema_t avro_schema_copy(avro_schema_t schema)
 				} val;
 				st_lookup(record_schema->fields, i, &val.data);
 				avro_schema_t type_copy =
-				    avro_schema_copy(val.field->type);
+				    avro_schema_copy_root(val.field->type, named_schemas);
 				avro_schema_record_field_append(new_schema,
 								val.field->name,
 								type_copy);
+				avro_schema_decref(type_copy);
 			}
 		}
 		break;
@@ -1324,6 +1347,10 @@ avro_schema_t avro_schema_copy(avro_schema_t schema)
 			    avro_schema_to_enum(schema);
 			new_schema = avro_schema_enum_ns(enum_schema->name,
 					enum_schema->space);
+			if (save_named_schemas(new_schema, named_schemas)) {
+				avro_set_error("Cannot save enum schema");
+				return NULL;
+			}
 			for (i = 0; i < enum_schema->symbols->num_entries; i++) {
 				union {
 					st_data_t data;
@@ -1344,6 +1371,10 @@ avro_schema_t avro_schema_copy(avro_schema_t schema)
 			    avro_schema_fixed_ns(fixed_schema->name,
 					         fixed_schema->space,
 					         fixed_schema->size);
+ 			if (save_named_schemas(new_schema, named_schemas)) {
+ 				avro_set_error("Cannot save fixed schema");
+ 				return NULL;
+ 			}
 		}
 		break;
 
@@ -1352,11 +1383,12 @@ avro_schema_t avro_schema_copy(avro_schema_t schema)
 			struct avro_map_schema_t *map_schema =
 			    avro_schema_to_map(schema);
 			avro_schema_t values_copy =
-			    avro_schema_copy(map_schema->values);
+			    avro_schema_copy_root(map_schema->values, named_schemas);
 			if (!values_copy) {
 				return NULL;
 			}
 			new_schema = avro_schema_map(values_copy);
+			avro_schema_decref(values_copy);
 		}
 		break;
 
@@ -1365,11 +1397,12 @@ avro_schema_t avro_schema_copy(avro_schema_t schema)
 			struct avro_array_schema_t *array_schema =
 			    avro_schema_to_array(schema);
 			avro_schema_t items_copy =
-			    avro_schema_copy(array_schema->items);
+			    avro_schema_copy_root(array_schema->items, named_schemas);
 			if (!items_copy) {
 				return NULL;
 			}
 			new_schema = avro_schema_array(items_copy);
+			avro_schema_decref(items_copy);
 		}
 		break;
 
@@ -1387,12 +1420,13 @@ avro_schema_t avro_schema_copy(avro_schema_t schema)
 					avro_schema_t schema;
 				} val;
 				st_lookup(union_schema->branches, i, &val.data);
-				schema_copy = avro_schema_copy(val.schema);
+				schema_copy = avro_schema_copy_root(val.schema, named_schemas);
 				if (avro_schema_union_append
 				    (new_schema, schema_copy)) {
 					avro_schema_decref(new_schema);
 					return NULL;
 				}
+				avro_schema_decref(schema_copy);
 			}
 		}
 		break;
@@ -1401,12 +1435,12 @@ avro_schema_t avro_schema_copy(avro_schema_t schema)
 		{
 			struct avro_link_schema_t *link_schema =
 			    avro_schema_to_link(schema);
-			/*
-			 * TODO: use an avro_schema_copy of to instead of pointing to
-			 * the same reference 
-			 */
-			avro_schema_incref(link_schema->to);
-			new_schema = avro_schema_link(link_schema->to);
+			avro_schema_t to;
+
+			to = find_named_schemas(avro_schema_name(link_schema->to),
+									avro_schema_namespace(link_schema->to),
+									named_schemas);
+			new_schema = avro_schema_link(to);
 		}
 		break;
 
@@ -1416,6 +1450,23 @@ avro_schema_t avro_schema_copy(avro_schema_t schema)
 	return new_schema;
 }
 
+avro_schema_t avro_schema_copy(avro_schema_t schema)
+{
+	avro_schema_t new_schema;
+	st_table *named_schemas;
+
+	named_schemas = st_init_strtable_with_size(DEFAULT_TABLE_SIZE);
+	if (!named_schemas) {
+		avro_set_error("Cannot allocate named schema map");
+		return NULL;
+	}
+
+	new_schema = avro_schema_copy_root(schema, named_schemas);
+	st_foreach(named_schemas, HASH_FUNCTION_CAST named_schema_free_foreach, 0);
+	st_free_table(named_schemas);
+	return new_schema;
+}
+
 avro_schema_t avro_schema_get_subschema(const avro_schema_t schema,
          const char *name)
 {
diff --git a/lang/c/tests/CMakeLists.txt b/lang/c/tests/CMakeLists.txt
index 1d533c6..589185c 100644
--- a/lang/c/tests/CMakeLists.txt
+++ b/lang/c/tests/CMakeLists.txt
@@ -48,12 +48,14 @@ add_avro_test(test_data_structures)
 add_avro_test(test_avro_schema)
 add_avro_test(test_avro_schema_names)
 add_avro_test(test_avro_values)
+add_avro_test(test_avro_766)
 add_avro_test(test_avro_968)
 add_avro_test(test_avro_984)
 add_avro_test(test_avro_1034)
 add_avro_test(test_avro_1084)
 add_avro_test(test_avro_1087)
 add_avro_test(test_avro_1165)
+add_avro_test(test_avro_1167)
 add_avro_test(test_avro_1237)
 add_avro_test(test_avro_1238)
 add_avro_test(test_avro_1279)
diff --git a/lang/c/tests/test_avro_1167.c b/lang/c/tests/test_avro_1167.c
new file mode 100644
index 0000000..869b37d
--- /dev/null
+++ b/lang/c/tests/test_avro_1167.c
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.  See the License for the specific language governing
+ * permissions and limitations under the License.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <avro.h>
+
+/* To see the AVRO-1167 memory leak, run this test program through
+ * valgrind.  The specific valgrind commandline to use from the
+ * avro-trunk/lang/c/tests directory is:
+ *    valgrind -v --track-origins=yes --leak-check=full
+ *          --show-reachable = yes ../build/tests/test_avro_1167
+ */
+
+int main(int argc, char **argv)
+{
+	const char  *json =
+		"{"
+		"  \"name\": \"repeated_subrecord_array\","
+		"  \"type\": \"record\","
+		"  \"fields\": ["
+		"    { \"name\": \"subrecord_one\","
+		"      \"type\": {"
+		"                  \"name\": \"SubrecordType\","
+		"                  \"type\": \"record\","
+		"                  \"fields\": ["
+		"                    { \"name\": \"x\", \"type\": \"int\" },"
+		"                    { \"name\": \"y\", \"type\": \"int\" }"
+		"                  ]"
+		"                }"
+		"    },"
+		"    { \"name\": \"subrecord_two\", \"type\": \"SubrecordType\" },"
+		"    { \"name\": \"subrecord_array\", \"type\": {"
+		"                                                 \"type\":\"array\","
+		"                                                 \"items\": \"SubrecordType\""
+		"                                               }"
+		"    }"
+		"  ]"
+		"}";
+
+	int rval;
+	avro_schema_t schema = NULL;
+	avro_schema_t schema_copy = NULL;
+	avro_schema_error_t error;
+
+	(void) argc;
+	(void) argv;
+
+	rval = avro_schema_from_json(json, strlen(json), &schema, &error);
+	if ( rval )
+	{
+		printf("Failed to read schema from JSON.\n");
+		exit(EXIT_FAILURE);
+	}
+	else
+	{
+		printf("Successfully read schema from JSON.\n");
+	}
+
+	schema_copy = avro_schema_copy( schema );
+	if ( ! avro_schema_equal(schema, schema_copy) )
+	{
+		printf("Failed avro_schema_equal(schema, schema_copy)\n");
+		exit(EXIT_FAILURE);
+	}
+
+	avro_schema_decref(schema);
+	avro_schema_decref(schema_copy);
+	return 0;
+}
diff --git a/lang/c/tests/test_avro_766.c b/lang/c/tests/test_avro_766.c
new file mode 100755
index 0000000..4e21368
--- /dev/null
+++ b/lang/c/tests/test_avro_766.c
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.  See the License for the specific language governing
+ * permissions and limitations under the License.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <avro.h>
+
+/* To see the AVRO-766 memory leak, run this test program through
+ * valgrind.  The specific valgrind commandline to use from the
+ * avro-trunk/lang/c/tests directory is:
+ *    valgrind -v --track-origins=yes --leak-check=full
+ *          --show-reachable = yes ../build/tests/test_avro_766
+ */
+int main(int argc, char **argv)
+{
+	const char  *json =
+		"{"
+		"  \"type\": \"record\","
+		"  \"name\": \"list\","
+		"  \"fields\": ["
+		"    { \"name\": \"x\", \"type\": \"int\" },"
+		"    { \"name\": \"y\", \"type\": \"int\" },"
+		"    { \"name\": \"next\", \"type\": [\"null\",\"list\"]},"
+		"    { \"name\": \"arraylist\", \"type\": { \"type\":\"array\", \"items\": \"list\" } }"
+		"  ]"
+		"}";
+
+	int rval;
+	avro_schema_t schema = NULL;
+	avro_schema_error_t error;
+
+	(void) argc;
+	(void) argv;
+
+	rval = avro_schema_from_json(json, strlen(json), &schema, &error);
+	if ( rval )
+	{
+		printf("Failed to read schema from JSON.\n");
+		exit(EXIT_FAILURE);
+	}
+	else
+	{
+		printf("Successfully read schema from JSON.\n");
+	}
+
+#define TEST_AVRO_1167 (1)
+       #if TEST_AVRO_1167
+	{
+		avro_schema_t schema_copy = NULL;
+		schema_copy = avro_schema_copy( schema );
+		if ( ! avro_schema_equal(schema, schema_copy) )
+		{
+			printf("Failed avro_schema_equal(schema, schema_copy)\n");
+			exit(EXIT_FAILURE);
+		}
+		avro_schema_decref(schema_copy);
+	}
+       #endif
+
+	avro_schema_decref(schema);
+	return 0;
+}