You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by re...@apache.org on 2019/12/20 14:48:33 UTC
[uima-uimaj] 01/01: [UIMA-6162] Concurrent binary serialization
produces corrupt output
This is an automated email from the ASF dual-hosted git repository.
rec pushed a commit to branch bugfix/UIMA-6162-Concurrent-binary-serialization-produces-corrupt-output
in repository https://gitbox.apache.org/repos/asf/uima-uimaj.git
commit e3cd9827a1424caeda481588f836d3a64e63816b
Author: Richard Eckart de Castilho <re...@apache.org>
AuthorDate: Fri Dec 20 15:48:21 2019 +0100
[UIMA-6162] Concurrent binary serialization produces corrupt output
- TUnit test which triggers the concurrent serialization data corruption situation
---
.../impl/ConcurrentBinarySerializationTest.java | 122 +++++++++++++++++++++
1 file changed, 122 insertions(+)
diff --git a/uimaj-core/src/test/java/org/apache/uima/cas/impl/ConcurrentBinarySerializationTest.java b/uimaj-core/src/test/java/org/apache/uima/cas/impl/ConcurrentBinarySerializationTest.java
new file mode 100644
index 0000000..eaadfdd
--- /dev/null
+++ b/uimaj-core/src/test/java/org/apache/uima/cas/impl/ConcurrentBinarySerializationTest.java
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.uima.cas.impl;
+
+import static java.lang.Math.max;
+import static java.lang.Math.min;
+import static org.apache.uima.cas.SerialFormat.*;
+import static org.apache.uima.util.CasCreationUtils.createCas;
+import static org.apache.uima.util.CasIOUtils.load;
+import static org.apache.uima.util.CasIOUtils.save;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+
+import org.apache.uima.UIMAFramework;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.SerialFormat;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.resource.metadata.TypeSystemDescription;
+import org.junit.Test;
+
+public class ConcurrentBinarySerializationTest
+{
+ /**
+ * Serialization of the CAS is not inherently thread-safe. This test tries to run multiple
+ * serializations of the CAS in parallel to trigger a situation where an invalid serialization
+ * is generated - to be found by deserializing again.
+ */
+ @Test
+ public void thatConcurrentSerializationWorks() throws Exception
+ {
+ final SerialFormat[] formats = { BINARY_TSI, SERIALIZED_TSI, COMPRESSED_FILTERED_TSI,
+ COMPRESSED_TSI };
+ final int typeCount = 10;
+
+ // Set up a couple of custom types
+ TypeSystemDescription tsd = UIMAFramework.getResourceSpecifierFactory()
+ .createTypeSystemDescription();
+ for (int n = 0; n < typeCount; n++) {
+ tsd.addType("Type" + n, "", CAS.TYPE_NAME_ANNOTATION);
+ }
+
+ CAS cas = createCas(tsd, null, null);
+ cas.setDocumentText("This is a test.");
+
+ Random rnd = new Random();
+
+ // Set up a couple of random annotations
+ for (int i = 0; i < 1000; i++) {
+ String type = "Type" + rnd.nextInt(typeCount);
+ int a = rnd.nextInt(cas.getDocumentText().length());
+ int b = rnd.nextInt(cas.getDocumentText().length());
+ AnnotationFS ann = cas.createAnnotation(cas.getTypeSystem().getType(type), min(a, b),
+ max(a, b));
+ cas.addFsToIndexes(ann);
+ }
+
+ // Schedulable task which serializes a CAS and then deserializes it again to test that it
+ // was serialized correctly. We randomly alternate between different binary serialization
+ // formats which all include type system information.
+ Callable<Boolean> casSerDeser = () -> {
+ try {
+ SerialFormat fmt = formats[rnd.nextInt(formats.length)];
+
+ System.out.printf("Serializing as %s...%n", fmt);
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ save(cas, bos, fmt);
+
+ System.out.printf("Deserializing...%n");
+ CAS outCas = createCas((TypeSystemDescription) null, null, null);
+ load(new ByteArrayInputStream(bos.toByteArray()), outCas);
+ }
+ catch (Exception e) {
+ System.out.printf("Failure: %s%n", e.getMessage());
+ return false;
+ }
+ return true;
+ };
+
+ // Schedule concurrent serializations
+ List<Future<Boolean>> results = new ArrayList<>();
+ ExecutorService executor = Executors.newFixedThreadPool(4);
+ for (int n = 0; n < 100; n++) {
+ results.add(executor.submit(casSerDeser));
+ }
+
+ // All futures must complete without returning an exception
+ assertTrue(results.stream().allMatch(r -> {
+ try {
+ return r.get() == true;
+ }
+ catch (Exception e) {
+ return false;
+ }
+ }));
+ }
+}