You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by re...@apache.org on 2020/03/08 18:28:23 UTC

[uima-uimaj] 01/01: [UIMA-6199] Unindexed document annotation added back to index during deserialization

This is an automated email from the ASF dual-hosted git repository.

rec pushed a commit to branch UIMA-6199-Unindexed-document-annotation-added-back-to-index-during-deserialization
in repository https://gitbox.apache.org/repos/asf/uima-uimaj.git

commit ab9731b5d24c6a14cd4b0d614508a04145e4d00a
Author: Richard Eckart de Castilho <re...@apache.org>
AuthorDate: Sun Mar 8 19:28:10 2020 +0100

    [UIMA-6199] Unindexed document annotation added back to index during deserialization
    
    - Added a unit test demonstrating the issue (currently failing of course)
---
 .../uima/util/CasIOUtilsAlwaysHoldOnTest.java      | 80 ++++++++++++++++++++++
 1 file changed, 80 insertions(+)

diff --git a/uimaj-core/src/test/java/org/apache/uima/util/CasIOUtilsAlwaysHoldOnTest.java b/uimaj-core/src/test/java/org/apache/uima/util/CasIOUtilsAlwaysHoldOnTest.java
new file mode 100644
index 0000000..4896d9d
--- /dev/null
+++ b/uimaj-core/src/test/java/org/apache/uima/util/CasIOUtilsAlwaysHoldOnTest.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.uima.util;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+
+import org.apache.uima.UIMAFramework;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.FeatureStructure;
+import org.apache.uima.cas.SerialFormat;
+import org.apache.uima.cas.impl.CASImpl;
+import org.apache.uima.resource.metadata.TypeSystemDescription;
+import org.junit.Test;
+
+public class CasIOUtilsAlwaysHoldOnTest {
+  @Test
+  public void thatDocumentAnnotationIsNotResurrected() throws Exception {
+    // Must set this to true, otherwise the test will not fail. Setting it to true will cause 
+    // FSes which are not in any index to still be serialized out. When reading this data back,
+    // UIMA will find the non-indexed DocumentAnnotation and add it back without checking whether
+    // is was actually indexed or not.
+    System.setProperty(CASImpl.ALWAYS_HOLD_ONTO_FSS, "true");
+      
+    String customDocAnnoTypeName = "org.apache.uima.testing.CustomDocumentAnnotation";
+      
+    TypeSystemDescription tsd = UIMAFramework.getResourceSpecifierFactory().createTypeSystemDescription();
+    tsd.addType(customDocAnnoTypeName, "", CAS.TYPE_NAME_DOCUMENT_ANNOTATION);
+    
+    CAS cas = CasCreationUtils.createCas(tsd, null, null);
+    
+    // Initialize the default document annotation
+    // ... then immediately remove it from the indexes.
+    FeatureStructure da = cas.getDocumentAnnotation();
+
+    assertThat(cas.select(cas.getTypeSystem().getType(CAS.TYPE_NAME_DOCUMENT_ANNOTATION)).asList())
+        .extracting(fs -> fs.getType().getName())
+        .containsExactly(CAS.TYPE_NAME_DOCUMENT_ANNOTATION);
+    
+    cas.removeFsFromIndexes(da);
+    
+    // Now add a new document annotation of our custom type
+    FeatureStructure cda = cas.createFS(cas.getTypeSystem().getType(customDocAnnoTypeName));
+    cas.addFsToIndexes(cda);
+
+    assertThat(cas.select(cas.getTypeSystem().getType(CAS.TYPE_NAME_DOCUMENT_ANNOTATION)).asList())
+        .extracting(fs -> fs.getType().getName())
+        .containsExactly(customDocAnnoTypeName);
+    
+    // Serialize to a buffer
+    ByteArrayOutputStream bos = new ByteArrayOutputStream();
+    CasIOUtils.save(cas, bos, SerialFormat.SERIALIZED_TSI);
+    
+    // Deserialize from the buffer
+    ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray());
+    CasIOUtils.load(bis, cas);
+    
+    assertThat(cas.select(cas.getTypeSystem().getType(CAS.TYPE_NAME_DOCUMENT_ANNOTATION)).asList())
+        .extracting(fs -> fs.getType().getName())
+        .containsExactly(customDocAnnoTypeName);
+  }
+}