You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by ea...@apache.org on 2008/08/27 22:44:12 UTC
svn commit: r689609 [8/8] - in /incubator/uima/uimaj/trunk/uimaj-core/src:
main/java/org/apache/uima/cas/ main/java/org/apache/uima/cas/impl/
main/java/org/apache/uima/internal/util/ main/resources/org/apache/uima/
test/java/org/apache/uima/cas/impl/
Modified: incubator/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/cas/impl/XmiCasDeserializerTest.java
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/cas/impl/XmiCasDeserializerTest.java?rev=689609&r1=689608&r2=689609&view=diff
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/cas/impl/XmiCasDeserializerTest.java (original)
+++ incubator/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/cas/impl/XmiCasDeserializerTest.java Wed Aug 27 13:44:11 2008
@@ -1,1029 +1,1595 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.uima.cas.impl;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.StringReader;
-import java.io.StringWriter;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Stack;
-
-import javax.xml.parsers.FactoryConfigurationError;
-import javax.xml.parsers.ParserConfigurationException;
-import javax.xml.parsers.SAXParser;
-import javax.xml.parsers.SAXParserFactory;
-
-import junit.framework.TestCase;
-
-import org.apache.uima.UIMAFramework;
-import org.apache.uima.cas.ArrayFS;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.FSIndex;
-import org.apache.uima.cas.FSIterator;
-import org.apache.uima.cas.Feature;
-import org.apache.uima.cas.FeatureStructure;
-import org.apache.uima.cas.IntArrayFS;
-import org.apache.uima.cas.StringArrayFS;
-import org.apache.uima.cas.Type;
-import org.apache.uima.cas.TypeSystem;
-import org.apache.uima.cas.impl.XmiSerializationSharedData.OotsElementData;
-import org.apache.uima.cas.impl.XmiSerializationSharedData.XmiArrayElement;
-import org.apache.uima.cas.text.AnnotationFS;
-import org.apache.uima.cas_data.impl.CasComparer;
-import org.apache.uima.internal.util.XmlAttribute;
-import org.apache.uima.internal.util.XmlElementNameAndContents;
-import org.apache.uima.resource.metadata.FsIndexDescription;
-import org.apache.uima.resource.metadata.TypeDescription;
-import org.apache.uima.resource.metadata.TypeSystemDescription;
-import org.apache.uima.resource.metadata.impl.TypePriorities_impl;
-import org.apache.uima.resource.metadata.impl.TypeSystemDescription_impl;
-import org.apache.uima.test.junit_extension.JUnitExtension;
-import org.apache.uima.util.CasCopier;
-import org.apache.uima.util.CasCreationUtils;
-import org.apache.uima.util.FileUtils;
-import org.apache.uima.util.XMLInputSource;
-import org.apache.uima.util.XMLSerializer;
-import org.xml.sax.Attributes;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.InputSource;
-import org.xml.sax.SAXException;
-import org.xml.sax.XMLReader;
-import org.xml.sax.helpers.DefaultHandler;
-
-
-public class XmiCasDeserializerTest extends TestCase {
-
- private FsIndexDescription[] indexes;
-
- private TypeSystemDescription typeSystem;
-
- /**
- * Constructor for XCASDeserializerTest.
- *
- * @param arg0
- */
- public XmiCasDeserializerTest(String arg0) throws IOException {
- super(arg0);
- }
-
- protected void setUp() throws Exception {
- File typeSystemFile = JUnitExtension.getFile("ExampleCas/testTypeSystem.xml");
- File indexesFile = JUnitExtension.getFile("ExampleCas/testIndexes.xml");
-
- typeSystem = UIMAFramework.getXMLParser().parseTypeSystemDescription(
- new XMLInputSource(typeSystemFile));
- indexes = UIMAFramework.getXMLParser().parseFsIndexCollection(new XMLInputSource(indexesFile))
- .getFsIndexes();
- }
-
- public void testDeserializeAndReserialize() throws Exception {
- try {
- File tsWithNoMultiRefs = JUnitExtension.getFile("ExampleCas/testTypeSystem.xml");
- doTestDeserializeAndReserialize(tsWithNoMultiRefs,false);
- File tsWithMultiRefs = JUnitExtension.getFile("ExampleCas/testTypeSystem_withMultiRefs.xml");
- doTestDeserializeAndReserialize(tsWithMultiRefs,false);
- //also test with JCas initialized
- doTestDeserializeAndReserialize(tsWithNoMultiRefs,true);
- doTestDeserializeAndReserialize(tsWithMultiRefs,true);
- } catch (Exception e) {
- JUnitExtension.handleException(e);
- }
- }
-
- private void doTestDeserializeAndReserialize(File typeSystemDescriptorFile, boolean useJCas) throws Exception {
- // deserialize a complex CAS from XCAS
- TypeSystemDescription typeSystemDescription = UIMAFramework.getXMLParser().parseTypeSystemDescription(
- new XMLInputSource(typeSystemDescriptorFile));
- CAS cas = CasCreationUtils.createCas(typeSystemDescription, new TypePriorities_impl(), indexes);
- if (useJCas) {
- cas.getJCas();
- }
-
- InputStream serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/cas.xml"));
- XCASDeserializer deser = new XCASDeserializer(cas.getTypeSystem());
- ContentHandler deserHandler = deser.getXCASHandler(cas);
- SAXParserFactory fact = SAXParserFactory.newInstance();
- SAXParser parser = fact.newSAXParser();
- XMLReader xmlReader = parser.getXMLReader();
- xmlReader.setContentHandler(deserHandler);
- xmlReader.parse(new InputSource(serCasStream));
- serCasStream.close();
-
- // reserialize as XMI
- String xml = serialize(cas, null);
-// System.out.println(xml);
-
- // deserialize into another CAS
- CAS cas2 = CasCreationUtils.createCas(typeSystemDescription, new TypePriorities_impl(), indexes);
- if (useJCas) {
- cas2.getJCas();
- }
- XmiCasDeserializer deser2 = new XmiCasDeserializer(cas2.getTypeSystem());
- ContentHandler deserHandler2 = deser2.getXmiCasHandler(cas2);
- xmlReader.setContentHandler(deserHandler2);
- xmlReader.parse(new InputSource(new StringReader(xml)));
-
- // compare
- assertEquals(cas.getAnnotationIndex().size(), cas2.getAnnotationIndex().size());
- assertEquals(cas.getDocumentText(), cas2.getDocumentText());
- CasComparer.assertEquals(cas,cas2);
-
- // check that array refs are not null
- Type entityType = cas2.getTypeSystem().getType("org.apache.uima.testTypeSystem.Entity");
- Feature classesFeat = entityType.getFeatureByBaseName("classes");
- Iterator iter = cas2.getIndexRepository().getIndex("testEntityIndex").iterator();
- assertTrue(iter.hasNext());
- while (iter.hasNext()) {
- FeatureStructure fs = (FeatureStructure) iter.next();
- StringArrayFS arrayFS = (StringArrayFS) fs.getFeatureValue(classesFeat);
- assertNotNull(arrayFS);
- for (int i = 0; i < arrayFS.size(); i++) {
- assertNotNull(arrayFS.get(i));
- }
- }
- Type annotArrayTestType = cas2.getTypeSystem().getType("org.apache.uima.testTypeSystem.AnnotationArrayTest");
- Feature annotArrayFeat = annotArrayTestType.getFeatureByBaseName("arrayOfAnnotations");
- Iterator iter2 = cas2.getAnnotationIndex(annotArrayTestType).iterator();
- assertTrue(iter2.hasNext());
- while (iter2.hasNext()) {
- FeatureStructure fs = (FeatureStructure) iter2.next();
- ArrayFS arrayFS = (ArrayFS) fs.getFeatureValue(annotArrayFeat);
- assertNotNull(arrayFS);
- for (int i = 0; i < arrayFS.size(); i++) {
- assertNotNull(arrayFS.get(i));
- }
- }
-
- // test that lenient mode does not report errors
- CAS cas3 = CasCreationUtils.createCas(new TypeSystemDescription_impl(),
- new TypePriorities_impl(), new FsIndexDescription[0]);
- if (useJCas) {
- cas3.getJCas();
- }
- XmiCasDeserializer deser3 = new XmiCasDeserializer(cas3.getTypeSystem());
- ContentHandler deserHandler3 = deser3.getXmiCasHandler(cas3, true);
- xmlReader.setContentHandler(deserHandler3);
- xmlReader.parse(new InputSource(new StringReader(xml)));
- }
-
- public void testMultiThreadedSerialize() throws Exception {
- try {
- File tsWithNoMultiRefs = JUnitExtension.getFile("ExampleCas/testTypeSystem.xml");
- doTestMultiThreadedSerialize(tsWithNoMultiRefs);
- File tsWithMultiRefs = JUnitExtension.getFile("ExampleCas/testTypeSystem_withMultiRefs.xml");
- doTestMultiThreadedSerialize(tsWithMultiRefs);
- } catch (Exception e) {
- JUnitExtension.handleException(e);
- }
- }
-
- private static class DoSerialize implements Runnable{
- private CAS cas;
-
- DoSerialize(CAS aCas) {
- cas = aCas;
- }
-
- public void run() {
- try {
- serialize(cas, null);
-// serialize(cas, null);
-// serialize(cas, null);
-// serialize(cas, null);
- } catch (IOException e) {
-
- e.printStackTrace();
- } catch (SAXException e) {
-
- e.printStackTrace();
- }
- }
- }
-
- private static int MAX_THREADS = 16;
- // do as sequence 1, 2, 4, 8, 16 and measure elapsed time
- private static int [] threadsToUse = new int[] {1, 2, 4, 8, 16/*, 32, 64*/};
-
- private void doTestMultiThreadedSerialize(File typeSystemDescriptor) throws Exception {
- // deserialize a complex CAS from XCAS
- CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
-
- InputStream serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/cas.xml"));
- XCASDeserializer deser = new XCASDeserializer(cas.getTypeSystem());
- ContentHandler deserHandler = deser.getXCASHandler(cas);
- SAXParserFactory fact = SAXParserFactory.newInstance();
- SAXParser parser = fact.newSAXParser();
- XMLReader xmlReader = parser.getXMLReader();
- xmlReader.setContentHandler(deserHandler);
- xmlReader.parse(new InputSource(serCasStream));
- serCasStream.close();
-
- // make n copies of the cas, so they all share
- // the same type system
-
- final CAS [] cases = new CAS[MAX_THREADS];
-
- for (int i = 0; i < MAX_THREADS; i++) {
- cases[i] = CasCreationUtils.createCas(cas.getTypeSystem(), new TypePriorities_impl(), indexes, null);
- CasCopier.copyCas(cas, cases[i], true);
- }
-
- // start n threads, serializing as XMI
-
- for (int i = 0; i < threadsToUse.length; i++) {
- Thread [] threads = new Thread[MAX_THREADS];
- long startTime = System.currentTimeMillis();
- for (int ti = 0; ti < threadsToUse[i]; ti++) {
- threads[ti] = new Thread(new DoSerialize(cases[ti]));
-
- threads[ti].start();
- }
- for (int ti = 0; ti < threadsToUse[i]; ti++) {
- threads[ti].join();
- //System.out.print(" "+ ti);
- }
- System.out.println("\nNumber of threads serializing: " + threadsToUse[i] +
- " Normalized millisecs (should be close to the same): " + (System.currentTimeMillis() - startTime) / threadsToUse[i]);
- }
- }
-
-
- public void testMultipleSofas() throws Exception {
- try {
- CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(),
- new FsIndexDescription[0]);
- // set document text for the initial view
- cas.setDocumentText("This is a test");
- // create a new view and set its document text
- CAS cas2 = cas.createView("OtherSofa");
- cas2.setDocumentText("This is only a test");
-
- // create an annotation and add to index of both views
- AnnotationFS anAnnot = cas.createAnnotation(cas.getAnnotationType(), 0, 5);
- cas.getIndexRepository().addFS(anAnnot);
- cas2.getIndexRepository().addFS(anAnnot);
- FSIndex tIndex = cas.getAnnotationIndex();
- FSIndex t2Index = cas2.getAnnotationIndex();
- assertTrue(tIndex.size() == 2); // document annot and this one
- assertTrue(t2Index.size() == 2); // ditto
-
- // serialize
- StringWriter sw = new StringWriter();
- XMLSerializer xmlSer = new XMLSerializer(sw, false);
- XmiCasSerializer xmiSer = new XmiCasSerializer(cas.getTypeSystem());
- xmiSer.serialize(cas, xmlSer.getContentHandler());
- String xml = sw.getBuffer().toString();
-
- // deserialize into another CAS (repeat twice to check it still works after reset)
- CAS newCas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(),
- new FsIndexDescription[0]);
- for (int i = 0; i < 2; i++) {
- XmiCasDeserializer newDeser = new XmiCasDeserializer(newCas.getTypeSystem());
- ContentHandler newDeserHandler = newDeser.getXmiCasHandler(newCas);
- SAXParserFactory fact = SAXParserFactory.newInstance();
- SAXParser parser = fact.newSAXParser();
- XMLReader xmlReader = parser.getXMLReader();
- xmlReader.setContentHandler(newDeserHandler);
- xmlReader.parse(new InputSource(new StringReader(xml)));
-
- // check sofas
- assertEquals("This is a test", newCas.getDocumentText());
- CAS newCas2 = newCas.getView("OtherSofa");
- assertEquals("This is only a test", newCas2.getDocumentText());
-
- // check that annotation is still indexed in both views
- assertTrue(tIndex.size() == 2); // document annot and this one
- assertTrue(t2Index.size() == 2); // ditto
-
- newCas.reset();
- }
- } catch (Exception e) {
- JUnitExtension.handleException(e);
- }
- }
-
- public void testTypeSystemFiltering() throws Exception {
- try {
- // deserialize a complex CAS from XCAS
- CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
-
- InputStream serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/cas.xml"));
- XCASDeserializer deser = new XCASDeserializer(cas.getTypeSystem());
- ContentHandler deserHandler = deser.getXCASHandler(cas);
- SAXParserFactory fact = SAXParserFactory.newInstance();
- SAXParser parser = fact.newSAXParser();
- XMLReader xmlReader = parser.getXMLReader();
- xmlReader.setContentHandler(deserHandler);
- xmlReader.parse(new InputSource(serCasStream));
- serCasStream.close();
-
- // now read in a TypeSystem that's a subset of those types
- TypeSystemDescription partialTypeSystemDesc = UIMAFramework.getXMLParser()
- .parseTypeSystemDescription(
- new XMLInputSource(JUnitExtension
- .getFile("ExampleCas/partialTestTypeSystem.xml")));
- TypeSystem partialTypeSystem = CasCreationUtils.createCas(partialTypeSystemDesc, null, null)
- .getTypeSystem();
-
- // reserialize as XMI, filtering out anything that doesn't fit in the
- // partialTypeSystem
- StringWriter sw = new StringWriter();
- XMLSerializer xmlSer = new XMLSerializer(sw, false);
- XmiCasSerializer xmiSer = new XmiCasSerializer(partialTypeSystem);
- xmiSer.serialize(cas, xmlSer.getContentHandler());
- String xml = sw.getBuffer().toString();
- // System.out.println(xml);
-
- // deserialize into another CAS (which has the whole type system)
- CAS cas2 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
- XmiCasDeserializer deser2 = new XmiCasDeserializer(cas2.getTypeSystem());
- ContentHandler deserHandler2 = deser2.getXmiCasHandler(cas2);
- xmlReader.setContentHandler(deserHandler2);
- xmlReader.parse(new InputSource(new StringReader(xml)));
-
- // check that types have been filtered out
- Type orgType = cas2.getTypeSystem().getType("org.apache.uima.testTypeSystem.Organization");
- assertNotNull(orgType);
- assertTrue(cas2.getAnnotationIndex(orgType).size() == 0);
- assertTrue(cas.getAnnotationIndex(orgType).size() > 0);
-
- // but that some types are still there
- Type personType = cas2.getTypeSystem().getType("org.apache.uima.testTypeSystem.Person");
- FSIndex personIndex = cas2.getAnnotationIndex(personType);
- assertTrue(personIndex.size() > 0);
-
- // check that mentionType has been filtered out (set to null)
- FeatureStructure somePlace = personIndex.iterator().get();
- Feature mentionTypeFeat = personType.getFeatureByBaseName("mentionType");
- assertNotNull(mentionTypeFeat);
- assertNull(somePlace.getStringValue(mentionTypeFeat));
- } catch (Exception e) {
- JUnitExtension.handleException(e);
- }
- }
-
- public void testNoInitialSofa() throws Exception {
- CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(),
- new FsIndexDescription[0]);
- // create non-annotation type so as not to create the _InitialView Sofa
- IntArrayFS intArrayFS = cas.createIntArrayFS(5);
- intArrayFS.set(0, 1);
- intArrayFS.set(1, 2);
- intArrayFS.set(2, 3);
- intArrayFS.set(3, 4);
- intArrayFS.set(4, 5);
- cas.getIndexRepository().addFS(intArrayFS);
-
- // serialize the CAS
- StringWriter sw = new StringWriter();
- XMLSerializer xmlSer = new XMLSerializer(sw, false);
- XmiCasSerializer xmiSer = new XmiCasSerializer(cas.getTypeSystem());
- xmiSer.serialize(cas, xmlSer.getContentHandler());
- String xml = sw.getBuffer().toString();
-
- // deserialize into another CAS
- CAS cas2 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(),
- new FsIndexDescription[0]);
-
- XmiCasDeserializer deser2 = new XmiCasDeserializer(cas2.getTypeSystem());
- ContentHandler deserHandler2 = deser2.getXmiCasHandler(cas2);
- SAXParserFactory fact = SAXParserFactory.newInstance();
- SAXParser parser = fact.newSAXParser();
- XMLReader xmlReader = parser.getXMLReader();
- xmlReader.setContentHandler(deserHandler2);
- xmlReader.parse(new InputSource(new StringReader(xml)));
-
- //test that index is correctly populated
- Type intArrayType = cas2.getTypeSystem().getType(CAS.TYPE_NAME_INTEGER_ARRAY);
- Iterator iter = cas2.getIndexRepository().getAllIndexedFS(intArrayType);
- assertTrue(iter.hasNext());
- IntArrayFS intArrayFS2 = (IntArrayFS)iter.next();
- assertFalse(iter.hasNext());
- assertEquals(5, intArrayFS2.size());
- assertEquals(1, intArrayFS2.get(0));
- assertEquals(2, intArrayFS2.get(1));
- assertEquals(3, intArrayFS2.get(2));
- assertEquals(4, intArrayFS2.get(3));
- assertEquals(5, intArrayFS2.get(4));
-
- // test that serializing the new CAS produces the same XML
- sw = new StringWriter();
- xmlSer = new XMLSerializer(sw, false);
- xmiSer = new XmiCasSerializer(cas2.getTypeSystem());
- xmiSer.serialize(cas2, xmlSer.getContentHandler());
- String xml2 = sw.getBuffer().toString();
- assertTrue(xml2.equals(xml));
- }
-
- public void testv1FormatXcas() throws Exception {
- CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(),
- new FsIndexDescription[0]);
- CAS v1cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(),
- new FsIndexDescription[0]);
-
- // get a complex CAS
- InputStream serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/cas.xml"));
- XCASDeserializer deser = new XCASDeserializer(cas.getTypeSystem());
- ContentHandler deserHandler = deser.getXCASHandler(cas);
- SAXParserFactory fact = SAXParserFactory.newInstance();
- SAXParser parser = fact.newSAXParser();
- XMLReader xmlReader = parser.getXMLReader();
- xmlReader.setContentHandler(deserHandler);
- xmlReader.parse(new InputSource(serCasStream));
- serCasStream.close();
-
- // test it
- assertTrue(CAS.NAME_DEFAULT_SOFA.equals(cas.getSofa().getSofaID()));
-
- // get a v1 XMI version of the same CAS
- serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/v1xmiCas.xml"));
- XmiCasDeserializer deser2 = new XmiCasDeserializer(v1cas.getTypeSystem());
- ContentHandler deserHandler2 = deser2.getXmiCasHandler(v1cas);
- xmlReader.setContentHandler(deserHandler2);
- xmlReader.parse(new InputSource(serCasStream));
- serCasStream.close();
-
- // compare
- assertEquals(cas.getAnnotationIndex().size(), v1cas.getAnnotationIndex().size());
- assertTrue(CAS.NAME_DEFAULT_SOFA.equals(v1cas.getSofa().getSofaID()));
-
- // now a v1 XMI version of a multiple Sofa CAS
- v1cas.reset();
- serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/xmiMsCasV1.xml"));
- deser2 = new XmiCasDeserializer(v1cas.getTypeSystem());
- deserHandler2 = deser2.getXmiCasHandler(v1cas);
- xmlReader.setContentHandler(deserHandler2);
- xmlReader.parse(new InputSource(serCasStream));
- serCasStream.close();
-
- // test it
- CAS engView = v1cas.getView("EnglishDocument");
- assertTrue(engView.getDocumentText().equals("this beer is good"));
- assertTrue(engView.getAnnotationIndex().size() == 5); // 4 annots plus documentAnnotation
- CAS gerView = v1cas.getView("GermanDocument");
- assertTrue(gerView.getDocumentText().equals("das bier ist gut"));
- assertTrue(gerView.getAnnotationIndex().size() == 5); // 4 annots plus documentAnnotation
- assertTrue(CAS.NAME_DEFAULT_SOFA.equals(v1cas.getSofa().getSofaID()));
- assertTrue(v1cas.getDocumentText().equals("some text for the default text sofa."));
-
- // reserialize as XMI
- StringWriter sw = new StringWriter();
- XMLSerializer xmlSer = new XMLSerializer(sw, false);
- XmiCasSerializer xmiSer = new XmiCasSerializer(v1cas.getTypeSystem());
- xmiSer.serialize(v1cas, xmlSer.getContentHandler());
- String xml = sw.getBuffer().toString();
-
- cas.reset();
-
- // deserialize into another CAS
- deser2 = new XmiCasDeserializer(cas.getTypeSystem());
- deserHandler2 = deser2.getXmiCasHandler(cas);
- xmlReader.setContentHandler(deserHandler2);
- xmlReader.parse(new InputSource(new StringReader(xml)));
-
- // test it
- engView = cas.getView("EnglishDocument");
- assertTrue(engView.getDocumentText().equals("this beer is good"));
- assertTrue(engView.getAnnotationIndex().size() == 5); // 4 annots plus documentAnnotation
- gerView = cas.getView("GermanDocument");
- assertTrue(gerView.getDocumentText().equals("das bier ist gut"));
- assertTrue(gerView.getAnnotationIndex().size() == 5); // 4 annots plus documentAnnotation
- assertTrue(CAS.NAME_DEFAULT_SOFA.equals(v1cas.getSofa().getSofaID()));
- assertTrue(v1cas.getDocumentText().equals("some text for the default text sofa."));
- }
-
- public void testDuplicateNsPrefixes() throws Exception {
- TypeSystemDescription ts = new TypeSystemDescription_impl();
- ts.addType("org.bar.foo.Foo", "", "uima.tcas.Annotation");
- ts.addType("org.baz.foo.Foo", "", "uima.tcas.Annotation");
- CAS cas = CasCreationUtils.createCas(ts, null, null);
- cas.setDocumentText("Foo");
- Type t1 = cas.getTypeSystem().getType("org.bar.foo.Foo");
- Type t2 = cas.getTypeSystem().getType("org.baz.foo.Foo");
- AnnotationFS a1 = cas.createAnnotation(t1,0,3);
- cas.addFsToIndexes(a1);
- AnnotationFS a2 = cas.createAnnotation(t2,0,3);
- cas.addFsToIndexes(a2);
-
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- XmiCasSerializer.serialize(cas, baos);
- baos.close();
- byte[] bytes = baos.toByteArray();
-
- CAS cas2 = CasCreationUtils.createCas(ts, null, null);
- ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
- XmiCasDeserializer.deserialize(bais, cas2);
- bais.close();
-
- CasComparer.assertEquals(cas, cas2);
- }
-
- public void testMerging() throws Exception {
- // deserialize a complex CAS from XCAS
- CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
- InputStream serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/cas.xml"));
- XCASDeserializer.deserialize(serCasStream, cas);
- serCasStream.close();
- int numAnnotations = cas.getAnnotationIndex().size(); //for comparison later
- String docText = cas.getDocumentText(); //for comparison later
- //add a new Sofa to test that multiple Sofas in original CAS work
- CAS preexistingView = cas.createView("preexistingView");
- String preexistingViewText = "John Smith blah blah blah";
- preexistingView.setDocumentText(preexistingViewText);
- createPersonAnnot(preexistingView, 0, 10);
-
- // do XMI serialization to a string, using XmiSerializationSharedData
- // to keep track of maximum ID generated
- XmiSerializationSharedData serSharedData = new XmiSerializationSharedData();
- String xmiStr = serialize(cas, serSharedData);
- int maxOutgoingXmiId = serSharedData.getMaxXmiId();
-
- //deserialize into two new CASes, again using XmiSerializationSharedData so
- //we can get consistent IDs later.
- CAS newCas1 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
- XmiSerializationSharedData deserSharedData1 = new XmiSerializationSharedData();
- deserialize(xmiStr, newCas1, deserSharedData1, false, -1);
-
- CAS newCas2 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
- XmiSerializationSharedData deserSharedData2 = new XmiSerializationSharedData();
- deserialize(xmiStr, newCas2, deserSharedData2, false, -1);
-
- //add new FS to each new CAS
- createPersonAnnot(newCas1, 0, 10);
- createPersonAnnot(newCas1, 20, 30);
- createPersonAnnot(newCas2, 40, 50);
- AnnotationFS person = createPersonAnnot(newCas2, 60, 70);
-
- //add an Owner relation that points to an organization in the original CAS,
- //to test links across merge boundary
- Type orgType = newCas2.getTypeSystem().getType(
- "org.apache.uima.testTypeSystem.Organization");
- AnnotationFS org = (AnnotationFS)newCas2.getAnnotationIndex(orgType).iterator().next();
- Type ownerType = newCas2.getTypeSystem().getType(
- "org.apache.uima.testTypeSystem.Owner");
- Feature argsFeat = ownerType.getFeatureByBaseName("relationArgs");
- Feature componentIdFeat = ownerType.getFeatureByBaseName("componentId");
- Type relArgsType = newCas2.getTypeSystem().getType(
- "org.apache.uima.testTypeSystem.BinaryRelationArgs");
- Feature domainFeat = relArgsType.getFeatureByBaseName("domainValue");
- Feature rangeFeat = relArgsType.getFeatureByBaseName("rangeValue");
- AnnotationFS ownerAnnot = newCas2.createAnnotation(ownerType, 0, 70);
- FeatureStructure relArgs = newCas2.createFS(relArgsType);
- relArgs.setFeatureValue(domainFeat, person);
- relArgs.setFeatureValue(rangeFeat, org);
- ownerAnnot.setFeatureValue(argsFeat, relArgs);
- ownerAnnot.setStringValue(componentIdFeat, "XCasDeserializerTest");
- newCas2.addFsToIndexes(ownerAnnot);
- int orgBegin = org.getBegin();
- int orgEnd = org.getEnd();
-
- //add Sofas
- CAS newView1 = newCas1.createView("newSofa1");
- final String sofaText1 = "This is a new Sofa, created in CAS 1.";
- newView1.setDocumentText(sofaText1);
- final String annotText = "Sofa";
- int annotStart1 = sofaText1.indexOf(annotText);
- AnnotationFS annot1 = newView1.createAnnotation(orgType, annotStart1, annotStart1 + annotText.length());
- newView1.addFsToIndexes(annot1);
- CAS newView2 = newCas2.createView("newSofa2");
- final String sofaText2 = "This is another new Sofa, created in CAS 2.";
- newView2.setDocumentText(sofaText2);
- int annotStart2 = sofaText2.indexOf(annotText);
- AnnotationFS annot2 = newView2.createAnnotation(orgType, annotStart2, annotStart2 + annotText.length());
- newView2.addFsToIndexes(annot2);
-
- //re-serialize each new CAS back to XMI, keeping consistent ids
- String newSerCas1 = serialize(newCas1, deserSharedData1);
- String newSerCas2 = serialize(newCas2, deserSharedData2);
-
- //merge the two XMI CASes back into the original CAS
- XmiSerializationSharedData deserSharedData3 = new XmiSerializationSharedData();
- deserialize(newSerCas1, cas, deserSharedData3, false, -1);
-
- assertEquals(numAnnotations +2, cas.getAnnotationIndex().size());
-
- deserialize(newSerCas2, cas, deserSharedData3, false, maxOutgoingXmiId);
-
- assertEquals(numAnnotations + 5, cas.getAnnotationIndex().size());
-
- assertEquals(docText, cas.getDocumentText());
-
- // Serialize/deserialize again in case merge created duplicate ids
- String newSerCasMerged = serialize(cas, deserSharedData3);
- deserialize(newSerCasMerged, cas, deserSharedData3, false, -1);
-
- //check covered text of annotations
- FSIterator iter = cas.getAnnotationIndex().iterator();
- while (iter.hasNext()) {
- AnnotationFS annot = (AnnotationFS)iter.next();
- assertEquals(cas.getDocumentText().substring(
- annot.getBegin(), annot.getEnd()), annot.getCoveredText());
- }
- //check Owner annotation we created to test link across merge boundary
- iter = cas.getAnnotationIndex(ownerType).iterator();
- while (iter.hasNext()) {
- AnnotationFS annot = (AnnotationFS)iter.next();
- String componentId = annot.getStringValue(componentIdFeat);
- if ("XCasDeserializerTest".equals(componentId)) {
- FeatureStructure targetRelArgs = annot.getFeatureValue(argsFeat);
- AnnotationFS targetDomain = (AnnotationFS)targetRelArgs.getFeatureValue(domainFeat);
- assertEquals(60, targetDomain.getBegin());
- assertEquals(70, targetDomain.getEnd());
- AnnotationFS targetRange = (AnnotationFS)targetRelArgs.getFeatureValue(rangeFeat);
- assertEquals(orgBegin, targetRange.getBegin());
- assertEquals(orgEnd, targetRange.getEnd());
- }
- }
- //check Sofas
- CAS targetView1 = cas.getView("newSofa1");
- assertEquals(sofaText1, targetView1.getDocumentText());
- CAS targetView2 = cas.getView("newSofa2");
- assertEquals(sofaText2, targetView2.getDocumentText());
- AnnotationFS targetAnnot1 = (AnnotationFS)
- targetView1.getAnnotationIndex(orgType).iterator().get();
- assertEquals(annotText, targetAnnot1.getCoveredText());
- AnnotationFS targetAnnot2 = (AnnotationFS)
- targetView2.getAnnotationIndex(orgType).iterator().get();
- assertEquals(annotText, targetAnnot2.getCoveredText());
- assertTrue(targetView1.getSofa().getSofaRef() !=
- targetView2.getSofa().getSofaRef());
-
- CAS checkPreexistingView = cas.getView("preexistingView");
- assertEquals(preexistingViewText, checkPreexistingView.getDocumentText());
- Type personType = cas.getTypeSystem().getType("org.apache.uima.testTypeSystem.Person");
- AnnotationFS targetAnnot3 = (AnnotationFS)
- checkPreexistingView.getAnnotationIndex(personType).iterator().get();
- assertEquals("John Smith", targetAnnot3.getCoveredText());
-
- //try an initial CAS that contains multiple Sofas
-
- }
-
- public void testOutOfTypeSystemData() throws Exception {
- // deserialize a simple XMI into a CAS with no TypeSystem
- CAS cas = CasCreationUtils.createCas(new TypeSystemDescription_impl(),
- new TypePriorities_impl(), new FsIndexDescription[0]);
- File xmiFile = JUnitExtension.getFile("ExampleCas/simpleCas.xmi");
- String xmiStr = FileUtils.file2String(xmiFile, "UTF-8");
-
- XmiSerializationSharedData sharedData = new XmiSerializationSharedData();
- deserialize(xmiStr, cas, sharedData, true, -1);
-
- //do some checks on the out-of-type system data
- List ootsElems = sharedData.getOutOfTypeSystemElements();
- assertEquals(9, ootsElems.size());
- List ootsViewMembers = sharedData.getOutOfTypeSystemViewMembers("1");
- assertEquals(7, ootsViewMembers.size());
-
- // now reserialize including OutOfTypeSystem data
- String xmiStr2 = serialize(cas, sharedData);
-
- //deserialize both original and new XMI into CASes that do have the full typesystem
- CAS newCas1 = CasCreationUtils.createCas(typeSystem, null, indexes);
- deserialize(xmiStr, newCas1, null, false, -1);
- CAS newCas2 = CasCreationUtils.createCas(typeSystem, null, indexes);
- deserialize(xmiStr2, newCas2, null, false, -1);
- CasComparer.assertEquals(newCas1, newCas2);
-
- //Test a partial type system with a missing some missing features and
- //missing "Organization" type
- File partialTypeSystemFile = JUnitExtension.getFile("ExampleCas/partialTestTypeSystem.xml");
- TypeSystemDescription partialTypeSystem = UIMAFramework.getXMLParser().parseTypeSystemDescription(
- new XMLInputSource(partialTypeSystemFile));
- CAS partialTsCas = CasCreationUtils.createCas(partialTypeSystem, null, indexes);
- XmiSerializationSharedData sharedData2 = new XmiSerializationSharedData();
- deserialize(xmiStr, partialTsCas, sharedData2, true, -1);
-
- assertEquals(1,sharedData2.getOutOfTypeSystemElements().size());
- OotsElementData ootsFeats3 = sharedData2.getOutOfTypeSystemFeatures(sharedData2.getFsAddrForXmiId(3));
- assertEquals(1, ootsFeats3.attributes.size());
- XmlAttribute ootsAttr = (XmlAttribute)ootsFeats3.attributes.get(0);
- assertEquals("mentionType", ootsAttr.name);
- assertEquals("NAME", ootsAttr.value);
- OotsElementData ootsFeats5 = sharedData2.getOutOfTypeSystemFeatures(sharedData2.getFsAddrForXmiId(5));
- assertEquals(0, ootsFeats5.attributes.size());
- assertEquals(1, ootsFeats5.childElements.size());
- XmlElementNameAndContents ootsChildElem = (XmlElementNameAndContents)
- ootsFeats5.childElements.get(0);
- assertEquals("mentionType", ootsChildElem.name.qName);
- assertEquals("NAME", ootsChildElem.contents);
-
- OotsElementData ootsFeats8 = sharedData2.getOutOfTypeSystemFeatures(sharedData2.getFsAddrForXmiId(8));
- assertEquals(1, ootsFeats8.attributes.size());
- OotsElementData ootsFeats10 = sharedData2.getOutOfTypeSystemFeatures(sharedData2.getFsAddrForXmiId(10));
- assertEquals(1, ootsFeats10.attributes.size());
- OotsElementData ootsFeats11 = sharedData2.getOutOfTypeSystemFeatures(sharedData2.getFsAddrForXmiId(11));
- assertEquals(4, ootsFeats11.childElements.size());
-
- String xmiStr3 = serialize(partialTsCas, sharedData2);
- newCas2.reset();
- deserialize(xmiStr3, newCas2, null, false, -1);
- CasComparer.assertEquals(newCas1, newCas2);
- }
-
- public void testOutOfTypeSystemArrayElement() throws Exception {
- //add to type system an annotation type that has an FSArray feature
- TypeDescription testAnnotTypeDesc = typeSystem.addType("org.apache.uima.testTypeSystem.TestAnnotation", "", "uima.tcas.Annotation");
- testAnnotTypeDesc.addFeature("arrayFeat", "", "uima.cas.FSArray");
- //populate a CAS with such an array
- CAS cas = CasCreationUtils.createCas(typeSystem, null, null);
- Type testAnnotType = cas.getTypeSystem().getType("org.apache.uima.testTypeSystem.TestAnnotation");
- Type orgType = cas.getTypeSystem().getType(
- "org.apache.uima.testTypeSystem.Organization");
- AnnotationFS orgAnnot1 = cas.createAnnotation(orgType, 0, 10);
- cas.addFsToIndexes(orgAnnot1);
- AnnotationFS orgAnnot2 = cas.createAnnotation(orgType, 10, 20);
- cas.addFsToIndexes(orgAnnot2);
- AnnotationFS testAnnot = cas.createAnnotation(testAnnotType, 0, 20);
- cas.addFsToIndexes(testAnnot);
- ArrayFS arrayFs = cas.createArrayFS(2);
- arrayFs.set(0, orgAnnot1);
- arrayFs.set(1, orgAnnot2);
- Feature arrayFeat = testAnnotType.getFeatureByBaseName("arrayFeat");
- testAnnot.setFeatureValue(arrayFeat, arrayFs);
-
- //serialize to XMI
- String xmiStr = serialize(cas, null);
-
- //deserialize into a CAS that's missing the Organization type
- File partialTypeSystemFile = JUnitExtension.getFile("ExampleCas/partialTestTypeSystem.xml");
- TypeSystemDescription partialTypeSystem = UIMAFramework.getXMLParser().parseTypeSystemDescription(
- new XMLInputSource(partialTypeSystemFile));
- testAnnotTypeDesc = partialTypeSystem.addType("org.apache.uima.testTypeSystem.TestAnnotation", "", "uima.tcas.Annotation");
- testAnnotTypeDesc.addFeature("arrayFeat", "", "uima.cas.FSArray");
- CAS partialTsCas = CasCreationUtils.createCas(partialTypeSystem, null, null);
- XmiSerializationSharedData sharedData = new XmiSerializationSharedData();
- deserialize(xmiStr, partialTsCas, sharedData, true, -1);
-
- //check out of type system data
- Type testAnnotType2 = partialTsCas.getTypeSystem().getType("org.apache.uima.testTypeSystem.TestAnnotation");
- FeatureStructure testAnnot2 = partialTsCas.getAnnotationIndex(testAnnotType2).iterator().get();
- Feature arrayFeat2 = testAnnotType2.getFeatureByBaseName("arrayFeat");
- FeatureStructure arrayFs2 = testAnnot2.getFeatureValue(arrayFeat2);
- List ootsElems = sharedData.getOutOfTypeSystemElements();
- assertEquals(2, ootsElems.size());
- List ootsArrayElems = sharedData.getOutOfTypeSystemArrayElements(arrayFs2.hashCode());
- assertEquals(2, ootsArrayElems.size());
- for (int i = 0; i < 2; i++) {
- OotsElementData oed = (OotsElementData)ootsElems.get(i);
- XmiArrayElement arel = (XmiArrayElement)ootsArrayElems.get(i);
- assertEquals(oed.xmiId, arel.xmiId);
- }
-
- //reserialize along with out of type system data
- String xmiStr2 = serialize(partialTsCas, sharedData);
-
- //deserialize into a new CAS and compare
- CAS cas2 = CasCreationUtils.createCas(typeSystem, null, null);
- deserialize(xmiStr2, cas2, null, false, -1);
-
- CasComparer.assertEquals(cas, cas2);
- }
-
- public void testOutOfTypeSystemListElement() throws Exception {
- //add to type system an annotation type that has an FSList feature
- TypeDescription testAnnotTypeDesc = typeSystem.addType("org.apache.uima.testTypeSystem.TestAnnotation", "", "uima.tcas.Annotation");
- testAnnotTypeDesc.addFeature("listFeat", "", "uima.cas.FSList");
- //populate a CAS with such an list
- CAS cas = CasCreationUtils.createCas(typeSystem, null, null);
- Type testAnnotType = cas.getTypeSystem().getType("org.apache.uima.testTypeSystem.TestAnnotation");
- Type orgType = cas.getTypeSystem().getType(
- "org.apache.uima.testTypeSystem.Organization");
- AnnotationFS orgAnnot1 = cas.createAnnotation(orgType, 0, 10);
- cas.addFsToIndexes(orgAnnot1);
- AnnotationFS orgAnnot2 = cas.createAnnotation(orgType, 10, 20);
- cas.addFsToIndexes(orgAnnot2);
- AnnotationFS testAnnot = cas.createAnnotation(testAnnotType, 0, 20);
- cas.addFsToIndexes(testAnnot);
- Type nonEmptyFsListType = cas.getTypeSystem().getType(CAS.TYPE_NAME_NON_EMPTY_FS_LIST);
- Type emptyFsListType = cas.getTypeSystem().getType(CAS.TYPE_NAME_EMPTY_FS_LIST);
- Feature headFeat = nonEmptyFsListType.getFeatureByBaseName("head");
- Feature tailFeat = nonEmptyFsListType.getFeatureByBaseName("tail");
- FeatureStructure emptyNode = cas.createFS(emptyFsListType);
- FeatureStructure secondNode = cas.createFS(nonEmptyFsListType);
- secondNode.setFeatureValue(headFeat, orgAnnot2);
- secondNode.setFeatureValue(tailFeat, emptyNode);
- FeatureStructure firstNode = cas.createFS(nonEmptyFsListType);
- firstNode.setFeatureValue(headFeat, orgAnnot1);
- firstNode.setFeatureValue(tailFeat, secondNode);
-
- Feature listFeat = testAnnotType.getFeatureByBaseName("listFeat");
- testAnnot.setFeatureValue(listFeat, firstNode);
-
- //serialize to XMI
- String xmiStr = serialize(cas, null);
-// System.out.println(xmiStr);
-
- //deserialize into a CAS that's missing the Organization type
- File partialTypeSystemFile = JUnitExtension.getFile("ExampleCas/partialTestTypeSystem.xml");
- TypeSystemDescription partialTypeSystem = UIMAFramework.getXMLParser().parseTypeSystemDescription(
- new XMLInputSource(partialTypeSystemFile));
- testAnnotTypeDesc = partialTypeSystem.addType("org.apache.uima.testTypeSystem.TestAnnotation", "", "uima.tcas.Annotation");
- testAnnotTypeDesc.addFeature("listFeat", "", "uima.cas.FSList");
- CAS partialTsCas = CasCreationUtils.createCas(partialTypeSystem, null, null);
- XmiSerializationSharedData sharedData = new XmiSerializationSharedData();
- deserialize(xmiStr, partialTsCas, sharedData, true, -1);
-
- //check out of type system data
- Type testAnnotType2 = partialTsCas.getTypeSystem().getType("org.apache.uima.testTypeSystem.TestAnnotation");
- FeatureStructure testAnnot2 = partialTsCas.getAnnotationIndex(testAnnotType2).iterator().get();
- Feature listFeat2 = testAnnotType2.getFeatureByBaseName("listFeat");
- FeatureStructure listFs = testAnnot2.getFeatureValue(listFeat2);
- List ootsElems = sharedData.getOutOfTypeSystemElements();
- assertEquals(2, ootsElems.size());
- OotsElementData oed = sharedData.getOutOfTypeSystemFeatures(listFs.hashCode());
- XmlAttribute attr = (XmlAttribute)oed.attributes.get(0);
- assertNotNull(attr);
- assertEquals(CAS.FEATURE_BASE_NAME_HEAD, attr.name);
- assertEquals(attr.value, ((OotsElementData)ootsElems.get(0)).xmiId);
-
- //reserialize along with out of type system data
- String xmiStr2 = serialize(partialTsCas, sharedData);
-// System.out.println(xmiStr2);
-
- //deserialize into a new CAS and compare
- CAS cas2 = CasCreationUtils.createCas(typeSystem, null, null);
- deserialize(xmiStr2, cas2, null, false, -1);
-
- CasComparer.assertEquals(cas, cas2);
- }
-
- public void testOutOfTypeSystemDataComplexCas() throws Exception {
- // deserialize a complex XCAS
- CAS originalCas = CasCreationUtils.createCas(typeSystem, null, indexes);
- InputStream serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/cas.xml"));
- XCASDeserializer.deserialize(serCasStream, originalCas);
- serCasStream.close();
-
- //serialize to XMI
- String xmiStr = serialize(originalCas, null);
-
- //deserialize into a CAS with no type system
- CAS casWithNoTs = CasCreationUtils.createCas(new TypeSystemDescription_impl(),
- new TypePriorities_impl(), new FsIndexDescription[0]);
- XmiSerializationSharedData sharedData = new XmiSerializationSharedData();
- deserialize(xmiStr, casWithNoTs, sharedData, true, -1);
-
- // now reserialize including OutOfTypeSystem data
- String xmiStr2 = serialize(casWithNoTs, sharedData);
-
- //deserialize into a new CAS that has the full type system
- CAS newCas = CasCreationUtils.createCas(typeSystem, null, indexes);
- deserialize(xmiStr2, newCas, null, false, -1);
-
- //compare
- CasComparer.assertEquals(originalCas, newCas);
-
- //Test a partial type system with a missing some missing features and
- //missing "Organization" type
- File partialTypeSystemFile = JUnitExtension.getFile("ExampleCas/partialTestTypeSystem.xml");
- TypeSystemDescription partialTypeSystem = UIMAFramework.getXMLParser().parseTypeSystemDescription(
- new XMLInputSource(partialTypeSystemFile));
- CAS partialTsCas = CasCreationUtils.createCas(partialTypeSystem, null, indexes);
- XmiSerializationSharedData sharedData2 = new XmiSerializationSharedData();
- deserialize(xmiStr, partialTsCas, sharedData2, true, -1);
-
- String xmiStr3 = serialize(partialTsCas, sharedData2);
- newCas.reset();
- deserialize(xmiStr3, newCas, null, false, -1);
- CasComparer.assertEquals(originalCas, newCas);
- }
-
- public void testGetNumChildren() throws Exception {
- // deserialize a complex XCAS
- CAS cas = CasCreationUtils.createCas(typeSystem, null, indexes);
-// InputStream serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/cas.xml"));
-// XCASDeserializer.deserialize(serCasStream, cas);
-// serCasStream.close();
- InputStream serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/simpleCas.xmi"));
- XmiCasDeserializer.deserialize(serCasStream, cas);
- serCasStream.close();
-
- // call serializer with a ContentHandler that checks numChildren
- XmiCasSerializer xmiSer = new XmiCasSerializer(cas.getTypeSystem());
- GetNumChildrenTestHandler handler = new GetNumChildrenTestHandler(xmiSer);
- xmiSer.serialize(cas, handler);
- }
-
- /** Utility method for serializing a CAS to an XMI String
- * */
- private static String serialize(CAS cas, XmiSerializationSharedData serSharedData) throws IOException, SAXException {
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- XmiCasSerializer.serialize(cas, null, baos, false, serSharedData);
- baos.close();
- String xmiStr = new String(baos.toByteArray(), "UTF-8"); //note by default XmiCasSerializer generates UTF-8
-
- //workaround for newline serialization problem in Sun Java 1.4.2
- //this test file should contain CRLF line endings, but Sun Java loses them
- //when it serializes XML.
- if(!builtInXmlSerializationSupportsCRs()) {
- xmiStr = xmiStr.replaceAll(" ", " ");
- }
- return xmiStr;
- }
-
-
- /** Utility method for deserializing a CAS from an XMI String */
- private void deserialize(String xmlStr, CAS cas, XmiSerializationSharedData sharedData, boolean lenient, int mergePoint) throws FactoryConfigurationError, ParserConfigurationException, SAXException, IOException {
- byte[] bytes = xmlStr.getBytes("UTF-8"); //this assumes the encoding is UTF-8, which is the default output encoding of the XmiCasSerializer
- ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
- XmiCasDeserializer.deserialize(bais, cas, lenient, sharedData, mergePoint);
- bais.close();
- }
-
- private AnnotationFS createPersonAnnot(CAS cas, int begin, int end) {
- Type personType = cas.getTypeSystem().getType("org.apache.uima.testTypeSystem.Person");
- AnnotationFS person = cas.createAnnotation(personType, begin, end);
- cas.addFsToIndexes(person);
- return person;
- }
-
- /**
- * Checks the Java vendor and version and returns true if running a version
- * of Java whose built-in XSLT support can properly serialize carriage return
- * characters, and false if not. It seems to be the case that Sun JVMs prior
- * to 1.5 do not properly serialize carriage return characters. We have to
- * modify our test case to account for this.
- * @return true if XML serialization of CRs behave properly in the current JRE
- */
- private static boolean builtInXmlSerializationSupportsCRs() {
- String javaVendor = System.getProperty("java.vendor");
- if( javaVendor.startsWith("Sun") ) {
- String javaVersion = System.getProperty("java.version");
- if( javaVersion.startsWith("1.3") || javaVersion.startsWith("1.4") )
- return false;
- }
- return true;
- }
-
- static class GetNumChildrenTestHandler extends DefaultHandler {
- XmiCasSerializer xmiSer;
- Stack childCountStack = new Stack();
-
- GetNumChildrenTestHandler(XmiCasSerializer xmiSer) {
- this.xmiSer = xmiSer;
- childCountStack.push(Integer.valueOf(1));
- }
-
- /* (non-Javadoc)
- * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
- */
- public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
- // TODO Auto-generated method stub
- super.startElement(uri, localName, qName, attributes);
- childCountStack.push(Integer.valueOf(xmiSer.getNumChildren()));
- }
-
- /* (non-Javadoc)
- * @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
- */
- public void endElement(String uri, String localName, String qName) throws SAXException {
- // TODO Auto-generated method stub
- super.endElement(uri, localName, qName);
- //check that we've seen the expected number of child elements
- //(count on top of stack should be 0)
- Integer count = (Integer)childCountStack.pop();
- assertEquals(0, count.intValue());
-
- //decremenet child count of our parent
- count = (Integer)childCountStack.pop();
- childCountStack.push(Integer.valueOf(count.intValue() - 1));
- }
-
- /* (non-Javadoc)
- * @see org.xml.sax.helpers.DefaultHandler#characters(char[], int, int)
- */
- public void characters(char[] ch, int start, int length) throws SAXException {
- // text node is considered a child
- if (length > 0) {
- Integer count = (Integer)childCountStack.pop();
- childCountStack.push(Integer.valueOf(count.intValue() - 1));
- }
- }
-
-
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.cas.impl;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.StringReader;
+import java.io.StringWriter;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Stack;
+
+import javax.xml.parsers.FactoryConfigurationError;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+
+import junit.framework.TestCase;
+
+import org.apache.uima.UIMAFramework;
+import org.apache.uima.cas.ArrayFS;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASRuntimeException;
+import org.apache.uima.cas.FSIndex;
+import org.apache.uima.cas.FSIterator;
+import org.apache.uima.cas.Feature;
+import org.apache.uima.cas.FeatureStructure;
+import org.apache.uima.cas.IntArrayFS;
+import org.apache.uima.cas.Marker;
+import org.apache.uima.cas.StringArrayFS;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.impl.XmiSerializationSharedData.OotsElementData;
+import org.apache.uima.cas.impl.XmiSerializationSharedData.XmiArrayElement;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.cas_data.impl.CasComparer;
+import org.apache.uima.internal.util.XmlAttribute;
+import org.apache.uima.internal.util.XmlElementNameAndContents;
+import org.apache.uima.resource.metadata.FsIndexDescription;
+import org.apache.uima.resource.metadata.TypeDescription;
+import org.apache.uima.resource.metadata.TypeSystemDescription;
+import org.apache.uima.resource.metadata.impl.TypePriorities_impl;
+import org.apache.uima.resource.metadata.impl.TypeSystemDescription_impl;
+import org.apache.uima.test.junit_extension.JUnitExtension;
+import org.apache.uima.util.CasCopier;
+import org.apache.uima.util.CasCreationUtils;
+import org.apache.uima.util.FileUtils;
+import org.apache.uima.util.XMLInputSource;
+import org.apache.uima.util.XMLSerializer;
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+import org.xml.sax.helpers.DefaultHandler;
+
+
+public class XmiCasDeserializerTest extends TestCase {
+
+ private FsIndexDescription[] indexes;
+
+ private TypeSystemDescription typeSystem;
+
+ /**
+ * Constructor for XCASDeserializerTest.
+ *
+ * @param arg0
+ */
+ public XmiCasDeserializerTest(String arg0) throws IOException {
+ super(arg0);
+ }
+
+ protected void setUp() throws Exception {
+ File typeSystemFile = JUnitExtension.getFile("ExampleCas/testTypeSystem.xml");
+ File indexesFile = JUnitExtension.getFile("ExampleCas/testIndexes.xml");
+
+ typeSystem = UIMAFramework.getXMLParser().parseTypeSystemDescription(
+ new XMLInputSource(typeSystemFile));
+ indexes = UIMAFramework.getXMLParser().parseFsIndexCollection(new XMLInputSource(indexesFile))
+ .getFsIndexes();
+ }
+
+ public void testDeserializeAndReserialize() throws Exception {
+ try {
+ File tsWithNoMultiRefs = JUnitExtension.getFile("ExampleCas/testTypeSystem.xml");
+ doTestDeserializeAndReserialize(tsWithNoMultiRefs,false);
+ File tsWithMultiRefs = JUnitExtension.getFile("ExampleCas/testTypeSystem_withMultiRefs.xml");
+ doTestDeserializeAndReserialize(tsWithMultiRefs,false);
+ //also test with JCas initialized
+ doTestDeserializeAndReserialize(tsWithNoMultiRefs,true);
+ doTestDeserializeAndReserialize(tsWithMultiRefs,true);
+ } catch (Exception e) {
+ JUnitExtension.handleException(e);
+ }
+ }
+
+ private void doTestDeserializeAndReserialize(File typeSystemDescriptorFile, boolean useJCas) throws Exception {
+ // deserialize a complex CAS from XCAS
+ TypeSystemDescription typeSystemDescription = UIMAFramework.getXMLParser().parseTypeSystemDescription(
+ new XMLInputSource(typeSystemDescriptorFile));
+ CAS cas = CasCreationUtils.createCas(typeSystemDescription, new TypePriorities_impl(), indexes);
+ if (useJCas) {
+ cas.getJCas();
+ }
+
+ InputStream serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/cas.xml"));
+ XCASDeserializer deser = new XCASDeserializer(cas.getTypeSystem());
+ ContentHandler deserHandler = deser.getXCASHandler(cas);
+ SAXParserFactory fact = SAXParserFactory.newInstance();
+ SAXParser parser = fact.newSAXParser();
+ XMLReader xmlReader = parser.getXMLReader();
+ xmlReader.setContentHandler(deserHandler);
+ xmlReader.parse(new InputSource(serCasStream));
+ serCasStream.close();
+
+ // reserialize as XMI
+ String xml = serialize(cas, null);
+// System.out.println(xml);
+
+ // deserialize into another CAS
+ CAS cas2 = CasCreationUtils.createCas(typeSystemDescription, new TypePriorities_impl(), indexes);
+ if (useJCas) {
+ cas2.getJCas();
+ }
+ XmiCasDeserializer deser2 = new XmiCasDeserializer(cas2.getTypeSystem());
+ ContentHandler deserHandler2 = deser2.getXmiCasHandler(cas2);
+ xmlReader.setContentHandler(deserHandler2);
+ xmlReader.parse(new InputSource(new StringReader(xml)));
+
+ // compare
+ assertEquals(cas.getAnnotationIndex().size(), cas2.getAnnotationIndex().size());
+ assertEquals(cas.getDocumentText(), cas2.getDocumentText());
+ CasComparer.assertEquals(cas,cas2);
+
+ // check that array refs are not null
+ Type entityType = cas2.getTypeSystem().getType("org.apache.uima.testTypeSystem.Entity");
+ Feature classesFeat = entityType.getFeatureByBaseName("classes");
+ Iterator iter = cas2.getIndexRepository().getIndex("testEntityIndex").iterator();
+ assertTrue(iter.hasNext());
+ while (iter.hasNext()) {
+ FeatureStructure fs = (FeatureStructure) iter.next();
+ StringArrayFS arrayFS = (StringArrayFS) fs.getFeatureValue(classesFeat);
+ assertNotNull(arrayFS);
+ for (int i = 0; i < arrayFS.size(); i++) {
+ assertNotNull(arrayFS.get(i));
+ }
+ }
+ Type annotArrayTestType = cas2.getTypeSystem().getType("org.apache.uima.testTypeSystem.AnnotationArrayTest");
+ Feature annotArrayFeat = annotArrayTestType.getFeatureByBaseName("arrayOfAnnotations");
+ Iterator iter2 = cas2.getAnnotationIndex(annotArrayTestType).iterator();
+ assertTrue(iter2.hasNext());
+ while (iter2.hasNext()) {
+ FeatureStructure fs = (FeatureStructure) iter2.next();
+ ArrayFS arrayFS = (ArrayFS) fs.getFeatureValue(annotArrayFeat);
+ assertNotNull(arrayFS);
+ for (int i = 0; i < arrayFS.size(); i++) {
+ assertNotNull(arrayFS.get(i));
+ }
+ }
+
+ // test that lenient mode does not report errors
+ CAS cas3 = CasCreationUtils.createCas(new TypeSystemDescription_impl(),
+ new TypePriorities_impl(), new FsIndexDescription[0]);
+ if (useJCas) {
+ cas3.getJCas();
+ }
+ XmiCasDeserializer deser3 = new XmiCasDeserializer(cas3.getTypeSystem());
+ ContentHandler deserHandler3 = deser3.getXmiCasHandler(cas3, true);
+ xmlReader.setContentHandler(deserHandler3);
+ xmlReader.parse(new InputSource(new StringReader(xml)));
+ }
+
+ public void testMultiThreadedSerialize() throws Exception {
+ try {
+ File tsWithNoMultiRefs = JUnitExtension.getFile("ExampleCas/testTypeSystem.xml");
+ doTestMultiThreadedSerialize(tsWithNoMultiRefs);
+ File tsWithMultiRefs = JUnitExtension.getFile("ExampleCas/testTypeSystem_withMultiRefs.xml");
+ doTestMultiThreadedSerialize(tsWithMultiRefs);
+ } catch (Exception e) {
+ JUnitExtension.handleException(e);
+ }
+ }
+
+ private static class DoSerialize implements Runnable{
+ private CAS cas;
+
+ DoSerialize(CAS aCas) {
+ cas = aCas;
+ }
+
+ public void run() {
+ try {
+ serialize(cas, null);
+// serialize(cas, null);
+// serialize(cas, null);
+// serialize(cas, null);
+ } catch (IOException e) {
+
+ e.printStackTrace();
+ } catch (SAXException e) {
+
+ e.printStackTrace();
+ }
+ }
+ }
+
+ private static int MAX_THREADS = 16;
+ // do as sequence 1, 2, 4, 8, 16 and measure elapsed time
+ private static int [] threadsToUse = new int[] {1, 2, 4, 8, 16/*, 32, 64*/};
+
+ private void doTestMultiThreadedSerialize(File typeSystemDescriptor) throws Exception {
+ // deserialize a complex CAS from XCAS
+ CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
+
+ InputStream serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/cas.xml"));
+ XCASDeserializer deser = new XCASDeserializer(cas.getTypeSystem());
+ ContentHandler deserHandler = deser.getXCASHandler(cas);
+ SAXParserFactory fact = SAXParserFactory.newInstance();
+ SAXParser parser = fact.newSAXParser();
+ XMLReader xmlReader = parser.getXMLReader();
+ xmlReader.setContentHandler(deserHandler);
+ xmlReader.parse(new InputSource(serCasStream));
+ serCasStream.close();
+
+ // make n copies of the cas, so they all share
+ // the same type system
+
+ final CAS [] cases = new CAS[MAX_THREADS];
+
+ for (int i = 0; i < MAX_THREADS; i++) {
+ cases[i] = CasCreationUtils.createCas(cas.getTypeSystem(), new TypePriorities_impl(), indexes, null);
+ CasCopier.copyCas(cas, cases[i], true);
+ }
+
+ // start n threads, serializing as XMI
+
+ for (int i = 0; i < threadsToUse.length; i++) {
+ Thread [] threads = new Thread[MAX_THREADS];
+ long startTime = System.currentTimeMillis();
+ for (int ti = 0; ti < threadsToUse[i]; ti++) {
+ threads[ti] = new Thread(new DoSerialize(cases[ti]));
+
+ threads[ti].start();
+ }
+ for (int ti = 0; ti < threadsToUse[i]; ti++) {
+ threads[ti].join();
+ //System.out.print(" "+ ti);
+ }
+ System.out.println("\nNumber of threads serializing: " + threadsToUse[i] +
+ " Normalized millisecs (should be close to the same): " + (System.currentTimeMillis() - startTime) / threadsToUse[i]);
+ }
+ }
+
+
+ public void testMultipleSofas() throws Exception {
+ try {
+ CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(),
+ new FsIndexDescription[0]);
+ // set document text for the initial view
+ cas.setDocumentText("This is a test");
+ // create a new view and set its document text
+ CAS cas2 = cas.createView("OtherSofa");
+ cas2.setDocumentText("This is only a test");
+
+ // create an annotation and add to index of both views
+ AnnotationFS anAnnot = cas.createAnnotation(cas.getAnnotationType(), 0, 5);
+ cas.getIndexRepository().addFS(anAnnot);
+ cas2.getIndexRepository().addFS(anAnnot);
+ FSIndex tIndex = cas.getAnnotationIndex();
+ FSIndex t2Index = cas2.getAnnotationIndex();
+ assertTrue(tIndex.size() == 2); // document annot and this one
+ assertTrue(t2Index.size() == 2); // ditto
+
+ // serialize
+ StringWriter sw = new StringWriter();
+ XMLSerializer xmlSer = new XMLSerializer(sw, false);
+ XmiCasSerializer xmiSer = new XmiCasSerializer(cas.getTypeSystem());
+ xmiSer.serialize(cas, xmlSer.getContentHandler());
+ String xml = sw.getBuffer().toString();
+
+ // deserialize into another CAS (repeat twice to check it still works after reset)
+ CAS newCas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(),
+ new FsIndexDescription[0]);
+ for (int i = 0; i < 2; i++) {
+ XmiCasDeserializer newDeser = new XmiCasDeserializer(newCas.getTypeSystem());
+ ContentHandler newDeserHandler = newDeser.getXmiCasHandler(newCas);
+ SAXParserFactory fact = SAXParserFactory.newInstance();
+ SAXParser parser = fact.newSAXParser();
+ XMLReader xmlReader = parser.getXMLReader();
+ xmlReader.setContentHandler(newDeserHandler);
+ xmlReader.parse(new InputSource(new StringReader(xml)));
+
+ // check sofas
+ assertEquals("This is a test", newCas.getDocumentText());
+ CAS newCas2 = newCas.getView("OtherSofa");
+ assertEquals("This is only a test", newCas2.getDocumentText());
+
+ // check that annotation is still indexed in both views
+ assertTrue(tIndex.size() == 2); // document annot and this one
+ assertTrue(t2Index.size() == 2); // ditto
+
+ newCas.reset();
+ }
+ } catch (Exception e) {
+ JUnitExtension.handleException(e);
+ }
+ }
+
+ public void testTypeSystemFiltering() throws Exception {
+ try {
+ // deserialize a complex CAS from XCAS
+ CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
+
+ InputStream serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/cas.xml"));
+ XCASDeserializer deser = new XCASDeserializer(cas.getTypeSystem());
+ ContentHandler deserHandler = deser.getXCASHandler(cas);
+ SAXParserFactory fact = SAXParserFactory.newInstance();
+ SAXParser parser = fact.newSAXParser();
+ XMLReader xmlReader = parser.getXMLReader();
+ xmlReader.setContentHandler(deserHandler);
+ xmlReader.parse(new InputSource(serCasStream));
+ serCasStream.close();
+
+ // now read in a TypeSystem that's a subset of those types
+ TypeSystemDescription partialTypeSystemDesc = UIMAFramework.getXMLParser()
+ .parseTypeSystemDescription(
+ new XMLInputSource(JUnitExtension
+ .getFile("ExampleCas/partialTestTypeSystem.xml")));
+ TypeSystem partialTypeSystem = CasCreationUtils.createCas(partialTypeSystemDesc, null, null)
+ .getTypeSystem();
+
+ // reserialize as XMI, filtering out anything that doesn't fit in the
+ // partialTypeSystem
+ StringWriter sw = new StringWriter();
+ XMLSerializer xmlSer = new XMLSerializer(sw, false);
+ XmiCasSerializer xmiSer = new XmiCasSerializer(partialTypeSystem);
+ xmiSer.serialize(cas, xmlSer.getContentHandler());
+ String xml = sw.getBuffer().toString();
+ // System.out.println(xml);
+
+ // deserialize into another CAS (which has the whole type system)
+ CAS cas2 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
+ XmiCasDeserializer deser2 = new XmiCasDeserializer(cas2.getTypeSystem());
+ ContentHandler deserHandler2 = deser2.getXmiCasHandler(cas2);
+ xmlReader.setContentHandler(deserHandler2);
+ xmlReader.parse(new InputSource(new StringReader(xml)));
+
+ // check that types have been filtered out
+ Type orgType = cas2.getTypeSystem().getType("org.apache.uima.testTypeSystem.Organization");
+ assertNotNull(orgType);
+ assertTrue(cas2.getAnnotationIndex(orgType).size() == 0);
+ assertTrue(cas.getAnnotationIndex(orgType).size() > 0);
+
+ // but that some types are still there
+ Type personType = cas2.getTypeSystem().getType("org.apache.uima.testTypeSystem.Person");
+ FSIndex personIndex = cas2.getAnnotationIndex(personType);
+ assertTrue(personIndex.size() > 0);
+
+ // check that mentionType has been filtered out (set to null)
+ FeatureStructure somePlace = personIndex.iterator().get();
+ Feature mentionTypeFeat = personType.getFeatureByBaseName("mentionType");
+ assertNotNull(mentionTypeFeat);
+ assertNull(somePlace.getStringValue(mentionTypeFeat));
+ } catch (Exception e) {
+ JUnitExtension.handleException(e);
+ }
+ }
+
+ public void testNoInitialSofa() throws Exception {
+ CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(),
+ new FsIndexDescription[0]);
+ // create non-annotation type so as not to create the _InitialView Sofa
+ IntArrayFS intArrayFS = cas.createIntArrayFS(5);
+ intArrayFS.set(0, 1);
+ intArrayFS.set(1, 2);
+ intArrayFS.set(2, 3);
+ intArrayFS.set(3, 4);
+ intArrayFS.set(4, 5);
+ cas.getIndexRepository().addFS(intArrayFS);
+
+ // serialize the CAS
+ StringWriter sw = new StringWriter();
+ XMLSerializer xmlSer = new XMLSerializer(sw, false);
+ XmiCasSerializer xmiSer = new XmiCasSerializer(cas.getTypeSystem());
+ xmiSer.serialize(cas, xmlSer.getContentHandler());
+ String xml = sw.getBuffer().toString();
+
+ // deserialize into another CAS
+ CAS cas2 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(),
+ new FsIndexDescription[0]);
+
+ XmiCasDeserializer deser2 = new XmiCasDeserializer(cas2.getTypeSystem());
+ ContentHandler deserHandler2 = deser2.getXmiCasHandler(cas2);
+ SAXParserFactory fact = SAXParserFactory.newInstance();
+ SAXParser parser = fact.newSAXParser();
+ XMLReader xmlReader = parser.getXMLReader();
+ xmlReader.setContentHandler(deserHandler2);
+ xmlReader.parse(new InputSource(new StringReader(xml)));
+
+ //test that index is correctly populated
+ Type intArrayType = cas2.getTypeSystem().getType(CAS.TYPE_NAME_INTEGER_ARRAY);
+ Iterator iter = cas2.getIndexRepository().getAllIndexedFS(intArrayType);
+ assertTrue(iter.hasNext());
+ IntArrayFS intArrayFS2 = (IntArrayFS)iter.next();
+ assertFalse(iter.hasNext());
+ assertEquals(5, intArrayFS2.size());
+ assertEquals(1, intArrayFS2.get(0));
+ assertEquals(2, intArrayFS2.get(1));
+ assertEquals(3, intArrayFS2.get(2));
+ assertEquals(4, intArrayFS2.get(3));
+ assertEquals(5, intArrayFS2.get(4));
+
+ // test that serializing the new CAS produces the same XML
+ sw = new StringWriter();
+ xmlSer = new XMLSerializer(sw, false);
+ xmiSer = new XmiCasSerializer(cas2.getTypeSystem());
+ xmiSer.serialize(cas2, xmlSer.getContentHandler());
+ String xml2 = sw.getBuffer().toString();
+ assertTrue(xml2.equals(xml));
+ }
+
+ public void testv1FormatXcas() throws Exception {
+ CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(),
+ new FsIndexDescription[0]);
+ CAS v1cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(),
+ new FsIndexDescription[0]);
+
+ // get a complex CAS
+ InputStream serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/cas.xml"));
+ XCASDeserializer deser = new XCASDeserializer(cas.getTypeSystem());
+ ContentHandler deserHandler = deser.getXCASHandler(cas);
+ SAXParserFactory fact = SAXParserFactory.newInstance();
+ SAXParser parser = fact.newSAXParser();
+ XMLReader xmlReader = parser.getXMLReader();
+ xmlReader.setContentHandler(deserHandler);
+ xmlReader.parse(new InputSource(serCasStream));
+ serCasStream.close();
+
+ // test it
+ assertTrue(CAS.NAME_DEFAULT_SOFA.equals(cas.getSofa().getSofaID()));
+
+ // get a v1 XMI version of the same CAS
+ serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/v1xmiCas.xml"));
+ XmiCasDeserializer deser2 = new XmiCasDeserializer(v1cas.getTypeSystem());
+ ContentHandler deserHandler2 = deser2.getXmiCasHandler(v1cas);
+ xmlReader.setContentHandler(deserHandler2);
+ xmlReader.parse(new InputSource(serCasStream));
+ serCasStream.close();
+
+ // compare
+ assertEquals(cas.getAnnotationIndex().size(), v1cas.getAnnotationIndex().size());
+ assertTrue(CAS.NAME_DEFAULT_SOFA.equals(v1cas.getSofa().getSofaID()));
+
+ // now a v1 XMI version of a multiple Sofa CAS
+ v1cas.reset();
+ serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/xmiMsCasV1.xml"));
+ deser2 = new XmiCasDeserializer(v1cas.getTypeSystem());
+ deserHandler2 = deser2.getXmiCasHandler(v1cas);
+ xmlReader.setContentHandler(deserHandler2);
+ xmlReader.parse(new InputSource(serCasStream));
+ serCasStream.close();
+
+ // test it
+ CAS engView = v1cas.getView("EnglishDocument");
+ assertTrue(engView.getDocumentText().equals("this beer is good"));
+ assertTrue(engView.getAnnotationIndex().size() == 5); // 4 annots plus documentAnnotation
+ CAS gerView = v1cas.getView("GermanDocument");
+ assertTrue(gerView.getDocumentText().equals("das bier ist gut"));
+ assertTrue(gerView.getAnnotationIndex().size() == 5); // 4 annots plus documentAnnotation
+ assertTrue(CAS.NAME_DEFAULT_SOFA.equals(v1cas.getSofa().getSofaID()));
+ assertTrue(v1cas.getDocumentText().equals("some text for the default text sofa."));
+
+ // reserialize as XMI
+ StringWriter sw = new StringWriter();
+ XMLSerializer xmlSer = new XMLSerializer(sw, false);
+ XmiCasSerializer xmiSer = new XmiCasSerializer(v1cas.getTypeSystem());
+ xmiSer.serialize(v1cas, xmlSer.getContentHandler());
+ String xml = sw.getBuffer().toString();
+
+ cas.reset();
+
+ // deserialize into another CAS
+ deser2 = new XmiCasDeserializer(cas.getTypeSystem());
+ deserHandler2 = deser2.getXmiCasHandler(cas);
+ xmlReader.setContentHandler(deserHandler2);
+ xmlReader.parse(new InputSource(new StringReader(xml)));
+
+ // test it
+ engView = cas.getView("EnglishDocument");
+ assertTrue(engView.getDocumentText().equals("this beer is good"));
+ assertTrue(engView.getAnnotationIndex().size() == 5); // 4 annots plus documentAnnotation
+ gerView = cas.getView("GermanDocument");
+ assertTrue(gerView.getDocumentText().equals("das bier ist gut"));
+ assertTrue(gerView.getAnnotationIndex().size() == 5); // 4 annots plus documentAnnotation
+ assertTrue(CAS.NAME_DEFAULT_SOFA.equals(v1cas.getSofa().getSofaID()));
+ assertTrue(v1cas.getDocumentText().equals("some text for the default text sofa."));
+ }
+
+ public void testDuplicateNsPrefixes() throws Exception {
+ TypeSystemDescription ts = new TypeSystemDescription_impl();
+ ts.addType("org.bar.foo.Foo", "", "uima.tcas.Annotation");
+ ts.addType("org.baz.foo.Foo", "", "uima.tcas.Annotation");
+ CAS cas = CasCreationUtils.createCas(ts, null, null);
+ cas.setDocumentText("Foo");
+ Type t1 = cas.getTypeSystem().getType("org.bar.foo.Foo");
+ Type t2 = cas.getTypeSystem().getType("org.baz.foo.Foo");
+ AnnotationFS a1 = cas.createAnnotation(t1,0,3);
+ cas.addFsToIndexes(a1);
+ AnnotationFS a2 = cas.createAnnotation(t2,0,3);
+ cas.addFsToIndexes(a2);
+
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ XmiCasSerializer.serialize(cas, baos);
+ baos.close();
+ byte[] bytes = baos.toByteArray();
+
+ CAS cas2 = CasCreationUtils.createCas(ts, null, null);
+ ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
+ XmiCasDeserializer.deserialize(bais, cas2);
+ bais.close();
+
+ CasComparer.assertEquals(cas, cas2);
+ }
+
+ public void testMerging() throws Exception {
+ // deserialize a complex CAS from XCAS
+ CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
+ InputStream serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/cas.xml"));
+ XCASDeserializer.deserialize(serCasStream, cas);
+ serCasStream.close();
+ int numAnnotations = cas.getAnnotationIndex().size(); //for comparison later
+ String docText = cas.getDocumentText(); //for comparison later
+ //add a new Sofa to test that multiple Sofas in original CAS work
+ CAS preexistingView = cas.createView("preexistingView");
+ String preexistingViewText = "John Smith blah blah blah";
+ preexistingView.setDocumentText(preexistingViewText);
+ createPersonAnnot(preexistingView, 0, 10);
+
+ // do XMI serialization to a string, using XmiSerializationSharedData
+ // to keep track of maximum ID generated
+ XmiSerializationSharedData serSharedData = new XmiSerializationSharedData();
+ String xmiStr = serialize(cas, serSharedData);
+ int maxOutgoingXmiId = serSharedData.getMaxXmiId();
+
+ //deserialize into two new CASes, again using XmiSerializationSharedData so
+ //we can get consistent IDs later.
+ CAS newCas1 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
+ XmiSerializationSharedData deserSharedData1 = new XmiSerializationSharedData();
+ deserialize(xmiStr, newCas1, deserSharedData1, false, -1);
+
+ CAS newCas2 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
+ XmiSerializationSharedData deserSharedData2 = new XmiSerializationSharedData();
+ deserialize(xmiStr, newCas2, deserSharedData2, false, -1);
+
+ //add new FS to each new CAS
+ createPersonAnnot(newCas1, 0, 10);
+ createPersonAnnot(newCas1, 20, 30);
+ createPersonAnnot(newCas2, 40, 50);
+ AnnotationFS person = createPersonAnnot(newCas2, 60, 70);
+
+ //add an Owner relation that points to an organization in the original CAS,
+ //to test links across merge boundary
+ Type orgType = newCas2.getTypeSystem().getType(
+ "org.apache.uima.testTypeSystem.Organization");
+ AnnotationFS org = (AnnotationFS)newCas2.getAnnotationIndex(orgType).iterator().next();
+ Type ownerType = newCas2.getTypeSystem().getType(
+ "org.apache.uima.testTypeSystem.Owner");
+ Feature argsFeat = ownerType.getFeatureByBaseName("relationArgs");
+ Feature componentIdFeat = ownerType.getFeatureByBaseName("componentId");
+ Type relArgsType = newCas2.getTypeSystem().getType(
+ "org.apache.uima.testTypeSystem.BinaryRelationArgs");
+ Feature domainFeat = relArgsType.getFeatureByBaseName("domainValue");
+ Feature rangeFeat = relArgsType.getFeatureByBaseName("rangeValue");
+ AnnotationFS ownerAnnot = newCas2.createAnnotation(ownerType, 0, 70);
+ FeatureStructure relArgs = newCas2.createFS(relArgsType);
+ relArgs.setFeatureValue(domainFeat, person);
+ relArgs.setFeatureValue(rangeFeat, org);
+ ownerAnnot.setFeatureValue(argsFeat, relArgs);
+ ownerAnnot.setStringValue(componentIdFeat, "XCasDeserializerTest");
+ newCas2.addFsToIndexes(ownerAnnot);
+ int orgBegin = org.getBegin();
+ int orgEnd = org.getEnd();
+
+ //add Sofas
+ CAS newView1 = newCas1.createView("newSofa1");
+ final String sofaText1 = "This is a new Sofa, created in CAS 1.";
+ newView1.setDocumentText(sofaText1);
+ final String annotText = "Sofa";
+ int annotStart1 = sofaText1.indexOf(annotText);
+ AnnotationFS annot1 = newView1.createAnnotation(orgType, annotStart1, annotStart1 + annotText.length());
+ newView1.addFsToIndexes(annot1);
+ CAS newView2 = newCas2.createView("newSofa2");
+ final String sofaText2 = "This is another new Sofa, created in CAS 2.";
+ newView2.setDocumentText(sofaText2);
+ int annotStart2 = sofaText2.indexOf(annotText);
+ AnnotationFS annot2 = newView2.createAnnotation(orgType, annotStart2, annotStart2 + annotText.length());
+ newView2.addFsToIndexes(annot2);
+
+ //re-serialize each new CAS back to XMI, keeping consistent ids
+ String newSerCas1 = serialize(newCas1, deserSharedData1);
+ String newSerCas2 = serialize(newCas2, deserSharedData2);
+
+ //merge the two XMI CASes back into the original CAS
+ XmiSerializationSharedData deserSharedData3 = new XmiSerializationSharedData();
+ deserialize(newSerCas1, cas, deserSharedData3, false, -1);
+
+ assertEquals(numAnnotations +2, cas.getAnnotationIndex().size());
+
+ deserialize(newSerCas2, cas, deserSharedData3, false, maxOutgoingXmiId);
+
+
+ assertEquals(numAnnotations + 5, cas.getAnnotationIndex().size());
+
+ assertEquals(docText, cas.getDocumentText());
+
+ // Serialize/deserialize again in case merge created duplicate ids
+ String newSerCasMerged = serialize(cas, deserSharedData3);
+
+ deserialize(newSerCasMerged, cas, deserSharedData3, false, -1);
+
+ //check covered text of annotations
+ FSIterator iter = cas.getAnnotationIndex().iterator();
+ while (iter.hasNext()) {
+ AnnotationFS annot = (AnnotationFS)iter.next();
+ assertEquals(cas.getDocumentText().substring(
+ annot.getBegin(), annot.getEnd()), annot.getCoveredText());
+ }
+ //check Owner annotation we created to test link across merge boundary
+ iter = cas.getAnnotationIndex(ownerType).iterator();
+ while (iter.hasNext()) {
+ AnnotationFS
+ annot = (AnnotationFS)iter.next();
+ String componentId = annot.getStringValue(componentIdFeat);
+ if ("XCasDeserializerTest".equals(componentId)) {
+ FeatureStructure targetRelArgs = annot.getFeatureValue(argsFeat);
+ AnnotationFS targetDomain = (AnnotationFS)targetRelArgs.getFeatureValue(domainFeat);
+ assertEquals(60, targetDomain.getBegin());
+ assertEquals(70, targetDomain.getEnd());
+ AnnotationFS targetRange = (AnnotationFS)targetRelArgs.getFeatureValue(rangeFeat);
+ assertEquals(orgBegin, targetRange.getBegin());
+ assertEquals(orgEnd, targetRange.getEnd());
+ }
+ }
+ //check Sofas
+ CAS targetView1 = cas.getView("newSofa1");
+ assertEquals(sofaText1, targetView1.getDocumentText());
+ CAS targetView2 = cas.getView("newSofa2");
+ assertEquals(sofaText2, targetView2.getDocumentText());
+ AnnotationFS targetAnnot1 = (AnnotationFS)
+ targetView1.getAnnotationIndex(orgType).iterator().get();
+ assertEquals(annotText, targetAnnot1.getCoveredText());
+ AnnotationFS targetAnnot2 = (AnnotationFS)
+ targetView2.getAnnotationIndex(orgType).iterator().get();
+ assertEquals(annotText, targetAnnot2.getCoveredText());
+ assertTrue(targetView1.getSofa().getSofaRef() !=
+ targetView2.getSofa().getSofaRef());
+
+ CAS checkPreexistingView = cas.getView("preexistingView");
+ assertEquals(preexistingViewText, checkPreexistingView.getDocumentText());
+ Type personType = cas.getTypeSystem().getType("org.apache.uima.testTypeSystem.Person");
+ AnnotationFS targetAnnot3 = (AnnotationFS)
+ checkPreexistingView.getAnnotationIndex(personType).iterator().get();
+ assertEquals("John Smith", targetAnnot3.getCoveredText());
+
+ //try an initial CAS that contains multiple Sofas
+
+ }
+
+ public void testDeltaCasMerging() throws Exception {
+ // deserialize a complex CAS from XCAS
+ CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
+ InputStream serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/cas.xml"));
+ XCASDeserializer.deserialize(serCasStream, cas);
+ serCasStream.close();
+ int numAnnotations = cas.getAnnotationIndex().size(); //for comparison later
+ String docText = cas.getDocumentText(); //for comparison later
+ //add a new Sofa to test that multiple Sofas in original CAS work
+ CAS preexistingView = cas.createView("preexistingView");
+ String preexistingViewText = "John Smith blah blah blah";
+ preexistingView.setDocumentText(preexistingViewText);
+ createPersonAnnot(preexistingView, 0, 10);
+
+ // do XMI serialization to a string, using XmiSerializationSharedData
+ // to keep track of maximum ID generated
+ XmiSerializationSharedData serSharedData = new XmiSerializationSharedData();
+ String xmiStr = serialize(cas, serSharedData);
+ int maxOutgoingXmiId = serSharedData.getMaxXmiId();
+
+ //deserialize into two new CASes, again using XmiSerializationSharedData so
+ //we can get consistent IDs later.
+ CAS newCas1 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
+ XmiSerializationSharedData deserSharedData1 = new XmiSerializationSharedData();
+ deserialize(xmiStr, newCas1, deserSharedData1, false, -1);
+
+ CAS newCas2 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
+ XmiSerializationSharedData deserSharedData2 = new XmiSerializationSharedData();
+ deserialize(xmiStr, newCas2, deserSharedData2, false, -1);
+
+ //create Marker before adding new FSs
+ Marker marker1 = newCas1.createMarker();
+ Marker marker2 = newCas2.createMarker();
+
+ //add new FS to each new CAS
+ createPersonAnnot(newCas1, 0, 10);
+ createPersonAnnot(newCas1, 20, 30);
+ createPersonAnnot(newCas2, 40, 50);
+ AnnotationFS person = createPersonAnnot(newCas2, 60, 70);
+
+ //add an Owner relation that points to an organization in the original CAS,
+ //to test links across merge boundary
+ Type orgType = newCas2.getTypeSystem().getType(
+ "org.apache.uima.testTypeSystem.Organization");
+ AnnotationFS org = (AnnotationFS)newCas2.getAnnotationIndex(orgType).iterator().next();
+ Type ownerType = newCas2.getTypeSystem().getType(
+ "org.apache.uima.testTypeSystem.Owner");
+ Feature argsFeat = ownerType.getFeatureByBaseName("relationArgs");
+ Feature componentIdFeat = ownerType.getFeatureByBaseName("componentId");
+ Type relArgsType = newCas2.getTypeSystem().getType(
+ "org.apache.uima.testTypeSystem.BinaryRelationArgs");
+ Feature domainFeat = relArgsType.getFeatureByBaseName("domainValue");
+ Feature rangeFeat = relArgsType.getFeatureByBaseName("rangeValue");
+ AnnotationFS ownerAnnot = newCas2.createAnnotation(ownerType, 0, 70);
+ FeatureStructure relArgs = newCas2.createFS(relArgsType);
+ relArgs.setFeatureValue(domainFeat, person);
+ relArgs.setFeatureValue(rangeFeat, org);
+ ownerAnnot.setFeatureValue(argsFeat, relArgs);
+ ownerAnnot.setStringValue(componentIdFeat, "XCasDeserializerTest");
+ newCas2.addFsToIndexes(ownerAnnot);
+ int orgBegin = org.getBegin();
+ int orgEnd = org.getEnd();
+
+ //add Sofas
+ CAS newView1 = newCas1.createView("newSofa1");
+ final String sofaText1 = "This is a new Sofa, created in CAS 1.";
+ newView1.setDocumentText(sofaText1);
+ final String annotText = "Sofa";
+ int annotStart1 = sofaText1.indexOf(annotText);
+ AnnotationFS annot1 = newView1.createAnnotation(orgType, annotStart1, annotStart1 + annotText.length());
+ newView1.addFsToIndexes(annot1);
+ CAS newView2 = newCas2.createView("newSofa2");
+ final String sofaText2 = "This is another new Sofa, created in CAS 2.";
+ newView2.setDocumentText(sofaText2);
+ int annotStart2 = sofaText2.indexOf(annotText);
+ AnnotationFS annot2 = newView2.createAnnotation(orgType, annotStart2, annotStart2 + annotText.length());
+ newView2.addFsToIndexes(annot2);
+
+ //re-serialize each new CAS back to Delta XMI, keeping consistent ids
+ String newSerCas1 = serialize(newCas1, deserSharedData1, marker1);
+ String newSerCas2 = serialize(newCas2, deserSharedData2, marker2);
+ //System.out.println(newSerCas1);
+ //System.out.println(newSerCas2);
+
+ //merge the two XMI CASes back into the original CAS
+ XmiSerializationSharedData deserSharedData3 = new XmiSerializationSharedData();
+ deserialize(newSerCas1, cas, serSharedData, false, maxOutgoingXmiId);
+
+ assertEquals(numAnnotations +2, cas.getAnnotationIndex().size());
+
+ deserialize(newSerCas2, cas, serSharedData, false, maxOutgoingXmiId);
+
+
+ assertEquals(numAnnotations + 5, cas.getAnnotationIndex().size());
+
+ assertEquals(docText, cas.getDocumentText());
+
+ // Serialize/deserialize again in case merge created duplicate ids
+ String newSerCasMerged = serialize(cas, serSharedData);
+ //System.out.println(newSerCasMerged);
+ deserialize(newSerCasMerged, cas, serSharedData, false, -1);
+
+ //check covered text of annotations
+ FSIterator iter = cas.getAnnotationIndex().iterator();
+ while (iter.hasNext()) {
+ AnnotationFS annot = (AnnotationFS)iter.next();
+ assertEquals(cas.getDocumentText().substring(
+ annot.getBegin(), annot.getEnd()), annot.getCoveredText());
+ }
+ //check Owner annotation we created to test link across merge boundary
+ iter = cas.getAnnotationIndex(ownerType).iterator();
+ while (iter.hasNext()) {
+ AnnotationFS
+ annot = (AnnotationFS)iter.next();
+ String componentId = annot.getStringValue(componentIdFeat);
+ if ("XCasDeserializerTest".equals(componentId)) {
+ FeatureStructure targetRelArgs = annot.getFeatureValue(argsFeat);
+ AnnotationFS targetDomain = (AnnotationFS)targetRelArgs.getFeatureValue(domainFeat);
+ assertEquals(60, targetDomain.getBegin());
+ assertEquals(70, targetDomain.getEnd());
+ AnnotationFS targetRange = (AnnotationFS)targetRelArgs.getFeatureValue(rangeFeat);
+ assertEquals(orgBegin, targetRange.getBegin());
+ assertEquals(orgEnd, targetRange.getEnd());
+ }
+ }
+ //check Sofas
+ CAS targetView1 = cas.getView("newSofa1");
+ assertEquals(sofaText1, targetView1.getDocumentText());
+ CAS targetView2 = cas.getView("newSofa2");
+ assertEquals(sofaText2, targetView2.getDocumentText());
+ AnnotationFS targetAnnot1 = (AnnotationFS)
+ targetView1.getAnnotationIndex(orgType).iterator().get();
+ assertEquals(annotText, targetAnnot1.getCoveredText());
+ AnnotationFS targetAnnot2 = (AnnotationFS)
+ targetView2.getAnnotationIndex(orgType).iterator().get();
+ assertEquals(annotText, targetAnnot2.getCoveredText());
+ assertTrue(targetView1.getSofa().getSofaRef() !=
+ targetView2.getSofa().getSofaRef());
+
+ CAS checkPreexistingView = cas.getView("preexistingView");
+ assertEquals(preexistingViewText, checkPreexistingView.getDocumentText());
+ Type personType = cas.getTypeSystem().getType("org.apache.uima.testTypeSystem.Person");
+ AnnotationFS targetAnnot3 = (AnnotationFS)
+ checkPreexistingView.getAnnotationIndex(personType).iterator().get();
+ assertEquals("John Smith", targetAnnot3.getCoveredText());
+
+ //try an initial CAS that contains multiple Sofas
+ }
+
+ public void testDeltaCasIgnorePreexistingFS() throws Exception {
+ try {
+ CAS cas1 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(),
+ indexes);
+ CAS cas2 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(),
+ indexes);
+ cas1.setDocumentText("This is a test document in the initial view");
+ AnnotationFS anAnnot1 = cas1.createAnnotation(cas1.getAnnotationType(), 0, 4);
+ cas1.getIndexRepository().addFS(anAnnot1);
+ AnnotationFS anAnnot2 = cas1.createAnnotation(cas1.getAnnotationType(), 5, 10);
+ cas1.getIndexRepository().addFS(anAnnot2);
+ FSIndex tIndex = cas1.getAnnotationIndex();
+ assertTrue(tIndex.size() == 3); //doc annot plus annots
+
+ //serialize complete
[... 743 lines stripped ...]