You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by ea...@apache.org on 2007/02/03 17:54:11 UTC

svn commit: r503248 [7/8] - /incubator/uima/uimacpp/trunk/src/cas/

Added: incubator/uima/uimacpp/trunk/src/cas/xcasdeserializer_handler.cpp
URL: http://svn.apache.org/viewvc/incubator/uima/uimacpp/trunk/src/cas/xcasdeserializer_handler.cpp?view=auto&rev=503248
==============================================================================
--- incubator/uima/uimacpp/trunk/src/cas/xcasdeserializer_handler.cpp (added)
+++ incubator/uima/uimacpp/trunk/src/cas/xcasdeserializer_handler.cpp Sat Feb  3 08:54:09 2007
@@ -0,0 +1,975 @@
+/** @name xcasdeserializer_handler.cpp
+-----------------------------------------------------------------------------
+
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+
+-----------------------------------------------------------------------------
+
+
+   10/18/2005  Initial creation
+
+-------------------------------------------------------------------------- */
+
+//TODO support multiple indexed FS
+
+// ---------------------------------------------------------------------------
+//  Includes
+// ---------------------------------------------------------------------------
+
+#include "uima/pragmas.hpp"
+#include <iostream>
+#include <sstream>
+#include <algorithm>
+using namespace std;
+
+#include "xercesc/sax2/Attributes.hpp"
+#include "xercesc/sax/SAXParseException.hpp"
+#include "xercesc/sax/SAXException.hpp"
+#include "uima/msg.h"
+#include "uima/exceptions.hpp"
+#include "uima/lowlevel_typesystem.hpp"
+#include "uima/lowlevel_indexrepository.hpp"
+
+#include "uima/xcasdeserializer_handler.hpp"
+#include "uima/internal_fspromoter.hpp"
+#include "uima/internal_typeshortcuts.hpp"
+#include "uima/internal_casimpl.hpp"
+#include "uima/fsindexrepository.hpp"
+#include "uima/arrayfs.hpp"
+#include "uima/annotator_context.hpp"
+#include "uima/resmgr.hpp"
+
+
+namespace uima {
+
+// ---------------------------------------------------------------------------
+//  XCASDeserialiserHandler: Constructors and Destructor
+// ---------------------------------------------------------------------------
+
+  XCASDeserializerHandler::XCASDeserializerHandler(CAS & cas, AnnotatorContext * const ctx) : iv_cas(cas.getBaseCas() ),
+      iv_locator(NULL), iv_ctx(ctx),
+      iv_casimpl( uima::internal::CASImpl::promoteCAS(*iv_cas)
+            //    ,iv_typesystem(iv_casimpl.getHeap().getTypeSystem())
+                ) {
+
+    //cout << " XCASDeserializerHandler::constructor " << endl;
+    currentContentFeat.append(DEFAULT_CONTENT_FEATURE);
+    sofaTypeCode = uima::internal::gs_tySofaType;
+    FSIndexRepository * fsidx = &iv_cas->getBaseIndexRepository();
+    indexRepositories.push_back((lowlevel::IndexRepository*)fsidx);
+    // There should always be another index for the Initial View
+    fsidx = &iv_cas->getView(CAS::NAME_DEFAULT_SOFA)->getIndexRepository();
+    indexRepositories.push_back((lowlevel::IndexRepository*)fsidx);
+
+    // get temp heap handle for checking if an FS is an annotation
+    lowlevel::FSHeap const & crHeap = iv_casimpl.getHeap();
+//       uima::lowlevel::FSHeap::TyFSHeap const & tyTempHeap = crHeap.iv_clTemporaryHeap;
+    iv_typesystem = &crHeap.getTypeSystem();
+
+    // add entry for baseCAS ... point non-compliant annotations at first Sofa
+    sofaRefMap.push_back(1);
+    // add entry for baseCAS ... _indexed=0 stays in 0
+    indexMap.push_back(0);
+  }
+
+  XCASDeserializerHandler::~XCASDeserializerHandler()   {
+    //cout << " XCASDeserializerHandler::destructor " << endl;
+    for (size_t i = 0; i < fsTree.size(); i++) {
+      FSInfo * fsinfo = (FSInfo*) fsTree[i];
+      if (fsinfo != 0) {
+        delete fsinfo->indexRep;
+        delete fsinfo;
+      }
+    }
+
+    for (size_t i = 0; i < idLess.size(); i++) {
+      FSInfo * fsinfo = (FSInfo*) idLess[i];
+      if (fsinfo != 0) {
+        delete fsinfo->indexRep;
+        delete fsinfo;
+      }
+    }
+
+    // free some storage
+    fsTree.clear();
+    sofaRefMap.clear();
+    indexMap.clear();
+  }
+
+
+// ---------------------------------------------------------------------------
+//  XCASDeserializerHandler: Implementation of the SAX2 ContentHandler interface
+// ---------------------------------------------------------------------------
+
+  void  XCASDeserializerHandler::setDocumentLocator(const Locator* const locator) {
+    //cout << " XCASDeserializerHandler::setDocumentLocator() " << endl;
+    iv_locator = locator;
+  }
+
+  void XCASDeserializerHandler::startDocument() {
+    //cout << " XCASDeserializerHandler::startDocument() " << endl;
+    iv_state = DOC_STATE;
+  }
+
+  void XCASDeserializerHandler::startElement(const   XMLCh* const    uri,
+      const   XMLCh* const    localname,
+      const   XMLCh* const    qname,
+      const Attributes & attrs) {
+    //cout << " XCASDeserializerHandler::startElement() " << UnicodeString((UChar*)qname, XMLString::stringLen(qname)) << endl;
+    assert(sizeof(XMLCh) == sizeof(UChar));
+
+    UnicodeString qualifiedName( (UChar const *) qname, XMLString::stringLen(qname));
+    buffer.remove();
+
+    switch (iv_state) {
+    case DOC_STATE: {
+      if (qualifiedName.compare(CASTAGNAME) != 0) {
+        ErrorInfo errInfo;
+        errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
+        ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_FATALERROR);
+        assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
+        msg.addParam( qualifiedName );
+        errInfo.setMessage(msg);
+        errInfo.setSeverity(ErrorInfo::unrecoverable);
+        ExcIllFormedInputError exc(errInfo);
+        throw exc;
+      }
+      iv_state = FS_STATE;
+      break;
+    }
+    case FS_STATE: {
+      currentContentFeat = DEFAULT_CONTENT_FEATURE;
+      if (qualifiedName.compare(DEFAULT_DOC_TYPE_NAME) == 0) {
+        iv_state = DOC_TEXT_STATE;
+      } else {
+        readFS(qualifiedName, attrs);
+      }
+      break;
+    }
+    case ARRAY_ELE_STATE: {
+      readArrayElement(qualifiedName, attrs);
+      break;
+    }
+    default: {
+      // If we're not in an element expecting state, raise an error.
+      ErrorInfo errInfo;
+      errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
+      ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_FATALERROR);
+      assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
+      msg.addParam( qualifiedName );
+      errInfo.setMessage(msg);
+      errInfo.setSeverity(ErrorInfo::unrecoverable);
+      ExcIllFormedInputError exc(errInfo);
+      throw exc;
+
+    }
+    }
+  }
+
+  void XCASDeserializerHandler::characters(
+    const   XMLCh* const  cpwsz,
+    const unsigned int    uiLength) {
+    //cout << "XCASDeserializerHandler::characters: \"" << icu::UnicodeString(cpwsz, uiLength) << "\"" << endl;
+
+    assert(sizeof(XMLCh) == sizeof(UChar));
+
+    switch (this->iv_state)  {
+    case DOC_TEXT_STATE:
+    case CONTENT_STATE:
+    case ARRAY_ELE_CONTENT_STATE:
+    case FEAT_CONTENT_STATE:
+      buffer.append( (UChar const *) cpwsz, 0, uiLength );
+      break;
+    default:
+      break;
+    }
+
+  }
+
+  void XCASDeserializerHandler::endElement(const XMLCh* const uri,
+      const XMLCh* const localname,
+      const XMLCh* const qname) {
+    //cout << " XCASDeserializerHandler::endElement() " << UnicodeString( (UChar*) qname, XMLString::stringLen(qname) ) << endl;
+    UnicodeString qualifiedName( (UChar const *) qname, XMLString::stringLen(qname));
+    assert(sizeof(XMLCh) == sizeof(UChar));
+    switch (iv_state) {
+    case DOC_STATE: {
+      // Do nothing.
+      break;
+    }
+    case FS_STATE: {
+      iv_state = DOC_STATE;
+      break;
+    }
+    case FEAT_STATE: {
+      iv_state = FS_STATE;
+      break;
+    }
+    case CONTENT_STATE: {
+      // Set the value of the content feature.
+      //if (!isAllWhitespace(buffer))
+      //{
+      handleFeature(currentAddr, currentContentFeat, buffer, true);
+      //}            }
+      iv_state = FS_STATE;
+      break;
+    }
+    case FEAT_CONTENT_STATE: {
+      // Create a feature value from an element.
+      handleFeature(currentAddr, qualifiedName, buffer, false);
+      iv_state = FEAT_STATE;
+      break;
+    }
+    case ARRAY_ELE_CONTENT_STATE: {
+      // Create an array value.
+      addArrayElement(buffer);
+      iv_state = ARRAY_ELE_STATE;
+      break;
+    }
+    case ARRAY_ELE_STATE: {
+      iv_state = FS_STATE;
+      break;
+    }
+    case DOC_TEXT_STATE: {
+      // Assume old style TCAS with one text Sofa
+      SofaFS newSofa = iv_cas->createInitialSofa(UnicodeString("text"));
+      CAS* cas = iv_cas->getInitialView();
+      cas->registerView(newSofa);
+      // Set the document text without creating a documentAnnotation
+      cas->setDocTextFromDeserializtion(UnicodeStringRef(buffer.getBuffer(), buffer.length()));
+
+      // and assume the new Sofa is at location 1!
+      int addr = 1;
+      int id = 1;
+      sofaRefMap.push_back(id);
+
+      // and register the id for this Sofa
+      FSInfo * fsInfo = new FSInfo(addr, new vector<int>);
+//           FSInfo * fsInfo = new FSInfo(addr, -1); //??? Should be 0 or -1 ???
+      fsTree[id] =  fsInfo;
+
+      iv_state = FS_STATE;
+      break;
+    }
+    }
+
+  }
+
+
+  void XCASDeserializerHandler::endDocument() {
+
+    //cout << " XCASDeserializerHandler::endDocument() " << endl;
+
+    //update features that are FSs
+    for (size_t i = 0; i < fsTree.size(); i++) {
+      FSInfo * fsinfo = (FSInfo*) fsTree[i];
+      if (fsinfo != 0)
+        finalizeFS(*fsinfo);
+    }
+    //update features that are FSs
+    for (size_t i = 0; i < idLess.size(); i++) {
+      FSInfo * fsinfo = (FSInfo*) idLess[i];
+      if (fsinfo != 0)
+        finalizeFS(*fsinfo);
+    }
+
+    //update document annotation info in tcas
+    for (size_t i = 0; i < tcasInstances.size(); i++) {
+      CAS * tcas = (CAS *) tcasInstances[i];
+      if (tcas != 0) {
+        tcas->pickupDocumentAnnotation();
+      }
+    }
+
+  }
+
+
+  void XCASDeserializerHandler::ignorableWhitespace(const  XMLCh* const cpwsz,
+      const unsigned int length) {
+    cout << " XCASDeserializerHandler::ignorableWhitespace() " << endl;
+
+  }
+
+
+
+// Create a new FS.
+  void XCASDeserializerHandler::readFS(UnicodeString & qualifiedName, const Attributes & attrs) {
+    UnicodeString typeName(qualifiedName);
+    Type type = iv_cas->getTypeSystem().getType(typeName);
+    uima::lowlevel::TyFSType typecode =  uima::internal::FSPromoter::demoteType(type);
+
+    if (!type.isValid() ) {
+      cout << "INFO: invalid type " << typeName << endl;
+      iv_state = CONTENT_STATE;
+    } else {
+      if (iv_cas->getTypeSystem().isArrayType(typecode)) {
+        readArray(type, attrs);
+        return;
+      }
+      uima::lowlevel::TyFS addr = uima::internal::FSPromoter::demoteFS(iv_cas->createFS(type));
+      readFS(addr, attrs, true);
+    }
+  }
+
+  void XCASDeserializerHandler::readFS(lowlevel::TyFS addr, const Attributes  & attrs, bool toIndex) {
+    // Hang on address for setting content feature
+    currentAddr = addr;
+
+    int id = -1;
+//       int sofaRef = -1; // 0 ==> baseCas indexRepository
+    vector<int>* sofaRef = new vector<int>;
+    UnicodeString attrName;
+    UnicodeString attrValue;
+    bool nameMapping = false;
+    UChar ubuff[256];
+    UErrorCode errorCode = U_ZERO_ERROR;
+    lowlevel::TyFS heapValue = iv_casimpl.getHeap().getType(addr);
+
+    // Special handling for Sofas
+    if (sofaTypeCode == heapValue) {
+      // create some maps to handle v1 format XCAS ...
+      // ... where the sofa feature of annotations was an int not a ref
+
+      // determine if this is the one and only initial view Sofa
+      bool isInitialView = false;
+      int extsz = UnicodeString(CAS::FEATURE_BASE_NAME_SOFAID).extract(ubuff, 256, errorCode);
+      if (extsz > 256) {
+        cout << "ACK!" << endl;
+      }
+      const UChar* sofaID = attrs.getValue(ubuff);
+      if (0==UnicodeStringRef(sofaID).compare(UnicodeString("_DefaultTextSofaName"))) {
+        sofaID = ubuff;
+      }
+//   no Sofa mapping for now
+//   if (iv_ctx != NULL) {
+//           // Map incoming SofaIDs
+//           sofaID = iv_ctx->mapToSofaID(sofaID).getSofaId();
+//         }
+      if (0==UnicodeStringRef(sofaID).compare(UnicodeString(CAS::NAME_DEFAULT_SOFA))) {
+        isInitialView = true;
+      }
+      // get the sofaNum
+      UnicodeString(CAS::FEATURE_BASE_NAME_SOFANUM).extract(ubuff, 256, errorCode);
+      const UChar* aString = attrs.getValue(ubuff);
+      int thisSofaNum = atoi(UnicodeStringRef(aString).asUTF8().c_str());
+
+      // get the sofa's FeatureStructure id
+      UnicodeString(ID_ATTR_NAME).extract(ubuff,256, errorCode);
+      aString = attrs.getValue(ubuff);
+      int sofaFsId = atoi(UnicodeStringRef(aString).asUTF8().c_str());
+
+      // for v1 and v2 formats, create the index map
+      // ***we assume Sofas are always received in Sofanum order***
+      // Two scenarios ... the initial view is the first sofa, or not.
+      // If not, the _indexed values need to be remapped to leave room for the initial view,
+      // which may or may not be in the received CAS.
+      if (indexMap.size() == 1) {
+        if (isInitialView) {
+          // the first Sofa an initial view
+          if (thisSofaNum == 2) {
+            // this sofa was mapped to the initial view
+            indexMap.push_back(-1); // for this CAS, there should not be a sofanum = 1
+            indexMap.push_back(1); // map 2 to 1
+            nextIndex = 2;
+          } else {
+            indexMap.push_back(1);
+            nextIndex = 2;
+          }
+        } else {
+          if (thisSofaNum > 1) {
+            // the first Sofa not initial, but sofaNum > 1
+            // must be a v2 format, and sofaNum better be 2
+            indexMap.push_back(1);
+            assert (thisSofaNum == 2);
+            indexMap.push_back(2);
+            nextIndex = 3;
+          } else {
+            // must be v1 format
+            indexMap.push_back(2);
+            nextIndex = 3;
+          }
+        }
+      } else {
+        // if the new Sofa is the initial view, always map to 1
+        if (isInitialView) {
+          // the initial view is not the first
+          // if v2 format, space already reserved in mapping
+          if (indexMap.size() == thisSofaNum) {
+            // v1 format, add mapping for initial view
+            indexMap.push_back(1);
+          }
+        } else {
+          indexMap.push_back(nextIndex);
+          nextIndex++;
+        }
+      }
+
+      // Now update the mapping from annotation int to ref values
+      if (sofaRefMap.size() == thisSofaNum) {
+        // Sofa received in sofaNum order, add new one
+        sofaRefMap.push_back(sofaFsId);
+      } else if ((int)sofaRefMap.size() > thisSofaNum) {
+        // new Sofa has lower sofaNum than last one
+        sofaRefMap[thisSofaNum] =  sofaFsId;
+      } else {
+        // new Sofa has skipped ahead more than 1
+        sofaRefMap.resize(thisSofaNum + 1);
+        sofaRefMap[thisSofaNum] = sofaFsId;
+      }
+    }
+
+    Type type = uima::internal::FSPromoter::promoteType(heapValue, iv_cas->getTypeSystem().getLowlevelTypeSystem());
+
+    for (size_t i = 0; i < attrs.getLength(); i++) {
+      assertWithMsg( sizeof(XMLCh) == sizeof(UChar), "Port required!");
+      attrName = (UChar*)attrs.getQName(i);
+      attrValue = (UChar*)attrs.getValue(i);
+      if (attrName.startsWith("_")) {
+        if (attrName.compare(ID_ATTR_NAME) == 0) {
+          id = atoi(UnicodeStringRef(attrValue).asUTF8().c_str());
+        } else if (attrName.compare(CONTENT_ATTR_NAME) == 0) {
+          currentContentFeat = attrValue;
+        } else if (attrName.compare(INDEXED_ATTR_NAME)== 0) {
+//             if (toIndex)
+//             { // suppress indexing of document annotation if old CAS
+//               if (attrValue.compare(TRUE_VALUE) == 0)
+//                 sofaRef = 1;
+//               else if (!attrValue.compare("false") == 0)
+//                 sofaRef = atoi(uniStr2SingleByteStr(attrValue,"UTF-8").c_str());
+//             }
+          char indexes[256];
+          // we have a problem here if number of indexed views is ridiculously big
+          strcpy(indexes, UnicodeStringRef(attrValue).asUTF8().c_str());
+          char* ptr = strtok (indexes," ");
+          while (ptr != NULL) {
+            sofaRef->push_back(atoi(ptr));
+            ptr = strtok (NULL, " ");
+          }
+        } else {
+          handleFeature(type, addr, attrName, attrValue, false);
+        }
+      } else {
+        if (nameMapping && attrName.compare(CAS::FEATURE_BASE_NAME_SOFAID) == 0) {
+          if (iv_ctx != NULL) {
+            attrValue = iv_ctx->mapToSofaID(attrValue).getSofaId();
+          }
+        }
+        handleFeature(type, addr, attrName, attrValue, false);
+      }
+    }
+    if (sofaTypeCode == heapValue) {
+      // If a Sofa, create CAS view to get new indexRepository
+      SofaFS sofa = (SofaFS) uima::internal::FSPromoter::promoteFS(addr, *iv_cas);
+      //also add to indexes so we can retrieve the Sofa later
+      iv_cas->getBaseIndexRepository().addFS(sofa);
+      CAS * tcas = iv_cas->getView(sofa);
+      assert ( EXISTS(tcas) );
+      if (sofa.getSofaRef() == 1) {
+        iv_cas->registerInitialSofa();
+      } else {
+        // add indexRepo for views other than the initial view
+        lowlevel::IndexRepository * indexRep = iv_cas->getIndexRepositoryForSofa(sofa);
+        assert ( EXISTS(indexRep) );
+        indexRepositories.push_back(indexRep);
+      }
+      tcasInstances.push_back(tcas);
+    }
+
+    // sofaRef.size()==0 means not indexed
+    FSInfo * fsInfo = new FSInfo(addr, sofaRef);
+    if (id < 0) {
+      idLess.push_back(fsInfo);
+    } else {
+      fsTree[id] =  fsInfo;
+    }
+    iv_state = CONTENT_STATE;
+
+  }
+
+  void XCASDeserializerHandler::readArray(Type & type, const Attributes  & attrs) {
+
+    vector<int>* indexRep = new vector<int>;
+    int id = -1;
+    int size=0;
+    UnicodeString attrName;
+    UnicodeString attrValue;
+
+    for (size_t i = 0; i < attrs.getLength(); i++) {
+      assertWithMsg( sizeof(XMLCh) == sizeof(UChar), "Port required!");
+      attrName = (UChar*)attrs.getQName(i);
+      attrValue = (UChar*)attrs.getValue(i);
+
+      if (attrName.compare(ID_ATTR_NAME) == 0) {
+        id = atoi(UnicodeStringRef(attrValue).asUTF8().c_str());
+      } else if (attrName.compare(ARRAY_SIZE_ATTR) == 0) {
+        size = atoi(UnicodeStringRef(attrValue).asUTF8().c_str());
+      } else if (attrName.compare(INDEXED_ATTR_NAME)== 0) {
+//             // suppress indexing of document annotation if old CAS
+//               if (attrValue.compare(TRUE_VALUE) == 0)
+//                 indexRep = 1;
+//               else if (!attrValue.compare("false") == 0)
+//                 indexRep = atoi(uniStr2SingleByteStr(attrValue,"UTF-8").c_str());
+        char indexes[256];
+        // we have a problem here if number of indexed views is ridiculously big
+        strcpy(indexes, UnicodeStringRef(attrValue).asUTF8().c_str());
+        char* ptr = strtok (indexes," ");
+        while (ptr != NULL) {
+          indexRep->push_back(atoi(ptr));
+          ptr = strtok (NULL, " ");
+        }
+      } else {
+        ErrorInfo errInfo;
+        errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
+        ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_FATALERROR);
+        assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
+        msg.addParam( attrName );
+        msg.addParam( attrValue );
+        errInfo.setMessage(msg);
+        errInfo.setSeverity(ErrorInfo::unrecoverable);
+        ExcIllFormedInputError exc(errInfo);
+        throw exc;
+      }
+    }
+
+
+    arrayType = uima::internal::FSPromoter::demoteType(type);
+    currentAddr = iv_casimpl.getHeap().createArrayFS(arrayType, size);
+
+    arrayPos=0;
+
+    // indexRep.size()==0 means not indexed
+    FSInfo * fsInfo = new FSInfo(currentAddr, indexRep);
+    if (id < 0) {
+      idLess.push_back(fsInfo);
+    } else {
+      fsTree[id] =  fsInfo;
+    }
+    iv_state = ARRAY_ELE_STATE;
+
+  }
+
+  void XCASDeserializerHandler::readArrayElement(UnicodeString & qualifiedName, const Attributes & attrs) {
+    if (qualifiedName.compare(ARRAY_ELEMENT_TAG) != 0) {
+      ErrorInfo errInfo;
+      errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
+      ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_FATALERROR);
+      assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
+      msg.addParam( qualifiedName );
+      errInfo.setMessage(msg);
+      errInfo.setSeverity(ErrorInfo::unrecoverable);
+      ExcIllFormedInputError exc(errInfo);
+      throw exc;
+    }
+    if (attrs.getLength() > 0) {
+      ErrorInfo errInfo;
+      errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
+      ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_FATALERROR);
+      assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
+      msg.addParam( qualifiedName );
+      errInfo.setMessage(msg);
+      errInfo.setSeverity(ErrorInfo::unrecoverable);
+      ExcIllFormedInputError exc(errInfo);
+      throw exc;
+    }
+    iv_state = ARRAY_ELE_CONTENT_STATE;
+  }
+
+  void XCASDeserializerHandler::addArrayElement(UnicodeString & buffer) {
+
+    if (arrayPos >= iv_casimpl.getHeap().getArraySize(currentAddr) ) {
+      ErrorInfo errInfo;
+      errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
+      ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_FATALERROR);
+      assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
+      msg.addParam("Invalid array FS in the CAS" );
+      errInfo.setMessage(msg);
+      errInfo.setSeverity(ErrorInfo::unrecoverable);
+      ExcIllFormedInputError exc(errInfo);
+      throw exc;
+    }
+
+    FeatureStructure fs = uima::internal::FSPromoter::promoteFS(currentAddr, *iv_cas);
+
+    switch (arrayType) {
+    case internal::gs_tyIntArrayType: {
+        int val = atoi(UnicodeStringRef(buffer).asUTF8().c_str());
+        IntArrayFS intFS(fs);
+        intFS.set( (size_t) arrayPos, val);
+        break;
+      }
+    case internal::gs_tyFloatArrayType: {
+        float val = atof(UnicodeStringRef(buffer).asUTF8().c_str());
+        FloatArrayFS floatFS(fs);
+        floatFS.set( (size_t) arrayPos, val);
+        break;
+      }
+    case internal::gs_tyStringArrayType: {
+        //add the striug
+        int stringoffset = iv_cas->getHeap()->addString(buffer);
+        //set the array value in fs heap
+        lowlevel::TyFS  stringref =  iv_cas->getHeap()->getStringAsFS(stringoffset);
+        lowlevel::TyHeapCell * fsarray = iv_cas->getHeap()->getCArrayFromFS(currentAddr);
+        fsarray[arrayPos] = stringref;
+        break;
+      }
+    case internal::gs_tyByteArrayType: {
+        short intval = atoi(UnicodeStringRef(buffer).asUTF8().c_str());
+        char charval[2];
+        sprintf(charval,"%c",intval);
+        ByteArrayFS byteFS(fs);
+        byteFS.set( (size_t) arrayPos, charval[0]);
+        break;
+      }
+    case internal::gs_tyBooleanArrayType: {
+        string val = UnicodeStringRef(buffer).asUTF8();
+        BooleanArrayFS booleanFS(fs);
+        if (val.compare("1")==0)  {
+          booleanFS.set( (size_t) arrayPos, true);
+          //cout << "bool buffer " << buffer << " val= " << val << "set " << true << endl;
+        } else {
+          booleanFS.set ( (size_t) arrayPos, false);
+          //cout << arrayPos << " bool buffer " << buffer << " val= " << val << "set " << false << endl;
+        }
+        break;
+      }
+    case internal::gs_tyShortArrayType: {
+        short val;
+        string strval;
+        UnicodeStringRef(buffer).extractUTF8(strval);
+        stringstream s;
+        s << strval.c_str();
+        s >> val;
+        ShortArrayFS shortFS(fs);
+        shortFS.set( (size_t) arrayPos, val);
+        break;
+      }
+    case internal::gs_tyLongArrayType: {
+        INT64 val;
+        stringstream s;
+        s << UnicodeStringRef(buffer).asUTF8();
+        s >> val;
+        LongArrayFS longFS(fs);
+        longFS.set( (size_t) arrayPos, val);
+        break;
+      }
+    case internal::gs_tyDoubleArrayType: {
+        DoubleArrayFS doubleFS(fs);
+        stringstream s;
+        s << UnicodeStringRef(buffer).asUTF8();
+        long double doubleval;
+        s >> doubleval;
+        doubleFS.set((size_t) arrayPos, doubleval);
+        break;
+      }
+    default: {    //array of FSs
+      lowlevel::TyFS fsid = atoi(UnicodeStringRef(buffer).asUTF8().c_str());
+      FeatureStructure fsitem(fsid, *iv_cas);
+      ArrayFS fsArrayfs(fs);
+      fsArrayfs.set((size_t) arrayPos, fsitem);
+    }
+    }
+
+    ++arrayPos;
+  }
+
+
+
+  // Create a feature value from a string representation.
+  void XCASDeserializerHandler::handleFeature(lowlevel::TyFS addr, UnicodeString & featName, UnicodeString & featVal, bool lenient) {
+    lowlevel::TyFSType fstype = iv_casimpl.getHeap().getType(addr);
+    Type type = uima::internal::FSPromoter::promoteType(fstype, iv_cas->getTypeSystem().getLowlevelTypeSystem());
+    handleFeature(type, addr, featName, featVal, lenient);
+  }
+
+  void XCASDeserializerHandler::handleFeature(Type & type, lowlevel::TyFS addr, UnicodeString & featName, UnicodeString & featVal,
+      bool lenient) {
+    char charFeatVal[10];
+
+    // handle v1.x format annotations, mapping int to ref values
+    lowlevel::TyFSType fstype = iv_casimpl.getHeap().getType(addr);
+    if (0==featName.compare("sofa") &&
+        iv_typesystem->subsumes(internal::gs_tyAnnotationBaseType, fstype)) {
+      int ifeatval = atoi(UnicodeStringRef(featVal).asUTF8().c_str());
+      sprintf(charFeatVal, "%d", sofaRefMap[ifeatval]);
+      featVal.setTo(UnicodeString(charFeatVal));
+    }
+
+    // handle v1.x sofanum values, remapping so that _InitialView always == 1
+    if (0==featName.compare(CAS::FEATURE_BASE_NAME_SOFAID)
+        && sofaTypeCode == fstype) {
+      int sofaNum = iv_casimpl.getHeap().getIntValue(addr, internal::gs_tySofaNumFeature);
+      iv_casimpl.getHeap().setIntValue(addr, internal::gs_tySofaNumFeature, indexMap[sofaNum]);
+    }
+
+    UnicodeString prefix(REF_PREFIX);
+    if (featName.startsWith(REF_PREFIX)) {
+      featName.remove(0,prefix.length());             // Delete prefix
+    }
+    FeatureStructure fs = uima::internal::FSPromoter::promoteFS(addr, *iv_cas);
+    Feature feat = type.getFeatureByBaseName(featName);
+    //    System.out.println("DEBUG - Feature map result: " + featName + " = " + feat.getName());
+    if (!feat.isValid()) { //feature does not exist in typesystem;
+      //Out of typesystem data not supported.
+      //we skip this feature
+      /**ErrorInfo errInfo;
+      errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
+      ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_FATALERROR);
+      assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
+      msg.addParam(type.getName());
+      msg.addParam(featName);
+      errInfo.setMessage(msg);
+      errInfo.setSeverity(ErrorInfo::unrecoverable);
+      ExcIllFormedInputError exc(errInfo);
+      throw exc; **/
+    } else  {
+      Type rtype;
+      feat.getRangeType(rtype);
+      lowlevel::TyFSType rangeType = uima::internal::FSPromoter::demoteType(rtype);
+      switch (rangeType) {
+      case internal::gs_tyIntegerType: {
+          if (featVal.length()>0) {
+            fs.setIntValue(feat, atoi(UnicodeStringRef(featVal).asUTF8().c_str()));
+          }
+          break;
+        }
+      case internal::gs_tyFloatType: {
+          if ( featVal.length() > 0)  {
+            fs.setFloatValue(feat, atof(UnicodeStringRef(featVal).asUTF8().c_str()));
+          }
+          break;
+        }
+      case internal::gs_tyStringType: {
+          if (featVal.length() > 0) {
+            fs.setStringValue(feat, featVal);
+          }
+          break;
+        }
+      case internal::gs_tyByteType: {
+          if (featVal.length() > 0) {
+            string val = UnicodeStringRef(featVal).asUTF8();
+            short intval = atoi(val.c_str());
+            char charval[2];
+            sprintf(charval,"%c",intval);
+            fs.setByteValue(feat, charval[0] );
+          }
+          break;
+        }
+      case internal::gs_tyBooleanType: {
+          if (featVal.length() > 0) {
+            string val = UnicodeStringRef(featVal).asUTF8();
+            if (val.compare("1")==0)
+              fs.setBooleanValue(feat, true );
+            else fs.setBooleanValue(feat, false);
+          }
+          break;
+        }
+      case internal::gs_tyShortType: {
+          if (featVal.length() > 0) {
+            string strval = UnicodeStringRef(featVal).asUTF8();
+            short shortval;
+            stringstream s;
+            s << strval.c_str();
+            s >> shortval;
+            fs.setShortValue(feat, shortval);
+          }
+          break;
+        }
+      case internal::gs_tyLongType: {
+          if (featVal.length() > 0) {
+            string strval = UnicodeStringRef(featVal).asUTF8();
+            INT64 longval;
+            stringstream s;
+            s << strval.c_str();
+            s >> longval;
+            fs.setLongValue(feat, longval);
+          }
+          break;
+        }
+      case internal::gs_tyDoubleType: {
+          if (featVal.length() > 0) {
+            string strval = UnicodeStringRef(featVal).asUTF8();
+            long double doubleval;
+            stringstream s;
+            s << strval.c_str();
+            s >> doubleval;
+            fs.setDoubleValue(feat, doubleval );
+          }
+          break;
+        }
+      default: {
+        if (rtype.isStringSubType()) {
+          if (featVal.length() > 0)
+            fs.setStringValue(feat, featVal);
+        } else if (featVal.length() > 0) {
+          lowlevel::TyFS val = (lowlevel::TyFS) atoi(UnicodeStringRef(featVal).asUTF8().c_str());
+          iv_casimpl.getHeap().setFeatureInternal(addr, uima::internal::FSPromoter::demoteFeature(feat), val);
+        }
+        break;
+      }
+      }
+    }
+  }
+
+  void XCASDeserializerHandler::finalizeFS(FSInfo & fsInfo) {
+    lowlevel::TyFS addr = fsInfo.addr;
+    FeatureStructure fs = uima::internal::FSPromoter::promoteFS(addr, *iv_cas);
+    Type type = fs.getType();
+
+    if (fsInfo.indexRep->size() >= 0) {
+      // Now add FS to all specified index repositories
+      for (int i = 0; i < (int)fsInfo.indexRep->size(); i++) {
+        lowlevel::IndexRepository *  pIndexRep;
+        if (indexMap.size() == 1) {
+          pIndexRep = indexRepositories[fsInfo.indexRep->at(i)];
+        } else {
+          pIndexRep = indexRepositories[indexMap[fsInfo.indexRep->at(i)]];
+        }
+        assert(EXISTS(pIndexRep));
+        pIndexRep->add(addr);
+      }
+    }
+
+
+    if (iv_cas->getTypeSystem().isArrayType(uima::internal::FSPromoter::demoteType(type)) ) {
+      finalizeArray(type, addr, fsInfo);
+      return;
+    }
+
+
+    //update heap value of features that are references to other FS.
+    vector<Feature> feats;
+    type.getAppropriateFeatures(feats);
+
+    FSInfo * fsValInfo;
+    for (size_t i = 0; i < feats.size(); i++) {
+      Feature feat = (Feature) feats[i];
+      Type rangeType;
+      feat.getRangeType(rangeType);
+
+      if (rangeType.isValid()) {
+        lowlevel::TyFSType  rangetypecode = uima::internal::FSPromoter::demoteType(rangeType);
+        lowlevel::TyFSFeature featcode = uima::internal::FSPromoter::demoteFeature(feat);
+
+        //if not primitive
+        if (!iv_cas->getTypeSystem().isPrimitive(rangetypecode)) {
+          //get the current feature value which is the id
+          lowlevel::TyFS featVal = iv_casimpl.getHeap().getFeatureInternal(addr, featcode);
+          //get the FSInfo object for that id
+          fsValInfo = (FSInfo*) fsTree[featVal];
+          //if there is a FSInfo
+          //set the feature value of this feature to the
+          //address in FSInfo else set it to NULL;
+          if (fsValInfo == NULL) {
+            //nothing to do, reference value already = 0!
+            //iv_casimpl.getHeap().setFSValue(addr, featcode, (lowlevel::TyFS) 0);
+          } else {
+            iv_casimpl.getHeap().setFSValue(addr, featcode, fsValInfo->addr);
+          }
+        }
+      }
+    }
+  }
+
+
+  void XCASDeserializerHandler::finalizeArray(Type & type, lowlevel::TyFS addr, FSInfo & fsInfo) {
+
+    lowlevel::TyFSType typecode = uima::internal::FSPromoter::demoteType(type);
+    if (!iv_cas->getTypeSystem().isFSArrayType(typecode)) {
+      return;
+    }
+
+    // *** WARNING ***  *** WARNING ***  *** WARNING ***  *** WARNING ***
+    // if implementation of ArrayFS on the heap changes, this code will be invalid
+    int size = (int)iv_cas->getHeap()->getHeap().getHeapValue(addr + 1);
+    FSInfo * fsValInfo;
+    for (int i=0; i<size; i++) {
+      lowlevel::TyFS id = iv_cas->getHeap()->getHeap().getHeapValue(addr + 2 + i);
+      fsValInfo = fsTree[id];
+      if (fsValInfo != NULL) {
+        iv_cas->getHeap()->getHeap().setHeapValue(addr + 2 + i, fsValInfo->addr);
+      }
+    }
+
+  }
+
+
+// ---------------------------------------------------------------------------
+//  XCASDeserializerHandler: Overrides of the SAX ErrorHandler interface
+// ---------------------------------------------------------------------------
+  void XCASDeserializerHandler::error(const SAXParseException& e) {
+    ErrorInfo errInfo;
+    errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
+    ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_ERROR);
+    assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
+    msg.addParam((UChar const *)e.getSystemId());
+    msg.addParam(e.getLineNumber());
+    msg.addParam(e.getColumnNumber());
+    msg.addParam((UChar const *) e.getMessage());
+    errInfo.setMessage(msg);
+    errInfo.setSeverity(ErrorInfo::unrecoverable);
+    ExcIllFormedInputError exc(errInfo);
+    throw exc;
+  }
+
+  void XCASDeserializerHandler::fatalError(const SAXParseException& e) {
+    ErrorInfo errInfo;
+    errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
+    ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_FATALERROR);
+    assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
+    msg.addParam((UChar const *)e.getSystemId());
+    msg.addParam(e.getLineNumber());
+    msg.addParam(e.getColumnNumber());
+    msg.addParam((UChar const *) e.getMessage());
+    errInfo.setMessage(msg);
+    errInfo.setSeverity(ErrorInfo::unrecoverable);
+    ExcIllFormedInputError exc(errInfo);
+    throw exc;
+  }
+
+  void XCASDeserializerHandler::warning(const SAXParseException& e) {
+    ErrorInfo errInfo;
+    errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
+    ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_WARNING);
+    assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
+    msg.addParam((UChar const *)e.getSystemId());
+    msg.addParam(e.getLineNumber());
+    msg.addParam(e.getColumnNumber());
+    msg.addParam((UChar const *) e.getMessage());
+    errInfo.setMessage(msg);
+    errInfo.setSeverity(ErrorInfo::unrecoverable);
+    ExcIllFormedInputError exc(errInfo);
+    throw exc;
+  }
+
+  char const * XCASDeserializerHandler::CASTAGNAME = "CAS";
+  char const * XCASDeserializerHandler::DEFAULT_DOC_TYPE_NAME = "uima.tcas.Document";
+  char const * XCASDeserializerHandler::DEFAULT_DOC_TEXT_FEAT = "text";
+  char const * XCASDeserializerHandler::INDEXED_ATTR_NAME = "_indexed";
+  char const * XCASDeserializerHandler::REF_PREFIX = "_ref_";
+  char const * XCASDeserializerHandler::ID_ATTR_NAME = "_id";
+  char const * XCASDeserializerHandler::CONTENT_ATTR_NAME = "_content";
+  char const * XCASDeserializerHandler::ARRAY_SIZE_ATTR = "size";
+  char const * XCASDeserializerHandler::ARRAY_ELEMENT_TAG = "i";
+  char const * XCASDeserializerHandler::TRUE_VALUE = "true";
+  char const * XCASDeserializerHandler::DEFAULT_CONTENT_FEATURE = "value";
+
+
+
+} // namespace uima
+
+

Propchange: incubator/uima/uimacpp/trunk/src/cas/xcasdeserializer_handler.cpp
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/uima/uimacpp/trunk/src/cas/xmlerror_handler.cpp
URL: http://svn.apache.org/viewvc/incubator/uima/uimacpp/trunk/src/cas/xmlerror_handler.cpp?view=auto&rev=503248
==============================================================================
--- incubator/uima/uimacpp/trunk/src/cas/xmlerror_handler.cpp (added)
+++ incubator/uima/uimacpp/trunk/src/cas/xmlerror_handler.cpp Sat Feb  3 08:54:09 2007
@@ -0,0 +1,108 @@
+/** \file xmlerror_handler.cpp .
+-----------------------------------------------------------------------------
+
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+
+-----------------------------------------------------------------------------
+
+   Description: Handler for XML error interface mapping XML to UIMACPP exceptions
+
+-----------------------------------------------------------------------------
+
+
+   09/23/2002  Initial creation
+
+-------------------------------------------------------------------------- */
+
+// ---------------------------------------------------------------------------
+//  Includes
+// ---------------------------------------------------------------------------
+#include "uima/pragmas.hpp"
+#include <iostream>
+#include <algorithm>
+using namespace std;
+
+#include "xercesc/sax/AttributeList.hpp"
+#include "xercesc/sax/SAXParseException.hpp"
+#include "xercesc/sax/SAXException.hpp"
+#include "uima/xmlerror_handler.hpp"
+#include "uima/msg.h"
+#include "uima/exceptions.hpp"
+
+namespace uima {
+
+// ---------------------------------------------------------------------------
+//  XMLErrorHandler: Constructors and Destructor
+// ---------------------------------------------------------------------------
+  XMLErrorHandler::XMLErrorHandler() {}
+
+
+  XMLErrorHandler::~XMLErrorHandler()   {}
+
+
+
+// ---------------------------------------------------------------------------
+//  XMLErrorHandler: Overrides of the SAX ErrorHandler interface
+// ---------------------------------------------------------------------------
+  void XMLErrorHandler::error(const SAXParseException& e) {
+    ErrorInfo errInfo;
+    errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
+    ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_ERROR);
+    assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
+    msg.addParam( (UChar const *) e.getSystemId());
+    msg.addParam(e.getLineNumber());
+    msg.addParam(e.getColumnNumber());
+    msg.addParam( (UChar const *) e.getMessage());
+    errInfo.setMessage(msg);
+    errInfo.setSeverity(ErrorInfo::unrecoverable);
+    ExcIllFormedInputError exc(errInfo);
+    throw exc;
+  }
+
+  void XMLErrorHandler::fatalError(const SAXParseException& e) {
+    ErrorInfo errInfo;
+    errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
+    ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_FATALERROR);
+    assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
+    msg.addParam( (UChar const *) e.getSystemId());
+    msg.addParam(e.getLineNumber());
+    msg.addParam(e.getColumnNumber());
+    msg.addParam( (UChar const *) e.getMessage());
+    errInfo.setMessage(msg);
+    errInfo.setSeverity(ErrorInfo::unrecoverable);
+    ExcIllFormedInputError exc(errInfo);
+    throw exc;
+  }
+
+  void XMLErrorHandler::warning(const SAXParseException& e) {
+    ErrorInfo errInfo;
+    errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
+    ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_WARNING);
+    assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
+    msg.addParam( (UChar const *) e.getSystemId());
+    msg.addParam(e.getLineNumber());
+    msg.addParam(e.getColumnNumber());
+    msg.addParam( (UChar const *) e.getMessage());
+    errInfo.setMessage(msg);
+    errInfo.setSeverity(ErrorInfo::unrecoverable);
+    ExcIllFormedInputError exc(errInfo);
+    throw exc;
+  }
+
+} // namespace uima
+

Propchange: incubator/uima/uimacpp/trunk/src/cas/xmlerror_handler.cpp
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/uima/uimacpp/trunk/src/cas/xmltypesystemreader.cpp
URL: http://svn.apache.org/viewvc/incubator/uima/uimacpp/trunk/src/cas/xmltypesystemreader.cpp?view=auto&rev=503248
==============================================================================
--- incubator/uima/uimacpp/trunk/src/cas/xmltypesystemreader.cpp (added)
+++ incubator/uima/uimacpp/trunk/src/cas/xmltypesystemreader.cpp Sat Feb  3 08:54:09 2007
@@ -0,0 +1,385 @@
+/** \file xmltypesystemreader.cpp .
+-----------------------------------------------------------------------------
+
+
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+
+-----------------------------------------------------------------------------
+
+   Description:
+
+-----------------------------------------------------------------------------
+
+
+-------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------- */
+/*       Include dependencies                                              */
+/* ----------------------------------------------------------------------- */
+// #define DEBUG_VERBOSE
+
+#include "uima/pragmas.hpp"
+
+#include "xercesc/util/PlatformUtils.hpp"
+#include "xercesc/sax/SAXParseException.hpp"
+#include "xercesc/parsers/XercesDOMParser.hpp"
+#include "xercesc/dom/DOMException.hpp"
+#include "xercesc/dom/DOMNamedNodeMap.hpp"
+
+#include "xercesc/sax/ErrorHandler.hpp"
+#include "xercesc/dom/DOMDocument.hpp"
+#include "xercesc/dom/DOMElement.hpp"
+#include "xercesc/dom/DOMNodeList.hpp"
+#include "xercesc/framework/LocalFileInputSource.hpp"
+#include "xercesc/framework/MemBufInputSource.hpp"
+
+#include "uima/xmltypesystemreader.hpp"
+#include "uima/lowlevel_typesystem.hpp"
+
+#include "uima/internal_xmlconstants.hpp"
+#include "uima/internal_casimpl.hpp"
+#include "uima/msg.h"
+#include "uima/xmlerror_handler.hpp"
+#include "uima/macros.h"
+#include "uima/casdefinition.hpp"
+
+/* ----------------------------------------------------------------------- */
+/*       Constants                                                         */
+/* ----------------------------------------------------------------------- */
+#define MAXXMLCHBUFF 256
+/* ----------------------------------------------------------------------- */
+/*       Forward declarations                                              */
+/* ----------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------- */
+/*       Types / Classes                                                   */
+/* ----------------------------------------------------------------------- */
+
+#define DEBUG_VERBOSE
+
+namespace uima {
+
+  UIMA_EXC_CLASSIMPLEMENT(XMLTypeSystemReaderException, uima::Exception);
+
+  class RethrowErrorHandler : public ErrorHandler {
+  public:
+
+    void error(const SAXParseException& e) {
+      throw e;
+    }
+
+    void fatalError(const SAXParseException& e) {
+      throw e;
+    }
+
+    void warning(const SAXParseException& e) {
+      throw e;
+    }
+
+    void resetErrors() {}
+  };
+
+
+  static XMLCh gs_tempXMLChBuffer[ MAXXMLCHBUFF ];
+
+  XMLCh const * convert(char const * cpBuf) {
+    bool bTranscodeSuccess = XMLString::transcode( cpBuf, gs_tempXMLChBuffer, MAXXMLCHBUFF -1 );
+    assert( bTranscodeSuccess );
+    return gs_tempXMLChBuffer;
+  }
+
+  UnicodeString convert( XMLCh const * cpUCBuf ) {
+    assertWithMsg( sizeof(XMLCh) == sizeof(UChar), "Port required!");
+    unsigned int uiLen = XMLString::stringLen( cpUCBuf );
+    return UnicodeString( (UChar const *) cpUCBuf, uiLen);
+  }
+
+
+  void XMLTypeSystemReader::checkValidityCondition(bool bCondition) const {
+    if (!bCondition) {
+      UIMA_EXC_THROW_NEW(XMLTypeSystemReaderException,
+                         UIMA_ERR_XMLTYPESYSTEMREADER,
+                         UIMA_MSG_ID_EXC_XMLTYPESYSTEMREADER,
+                         uima::ErrorMessage(UIMA_MSG_ID_EXCON_READING_TYPESYSTEM_FROM_XML),
+                         uima::ErrorInfo::unrecoverable
+                        );
+    }
+  }
+
+  void XMLTypeSystemReader::checkValidityCondition(bool bCondition, TyMessageId tyMessage, icu::UnicodeString const & crString) const {
+    if (!bCondition) {
+      uima::ErrorMessage msg(tyMessage);
+      msg.addParam( crString );
+      UIMA_EXC_THROW_NEW(XMLTypeSystemReaderException,
+                         UIMA_ERR_XMLTYPESYSTEMREADER,
+                         UIMA_MSG_ID_EXC_XMLTYPESYSTEMREADER,
+                         msg,
+                         uima::ErrorInfo::unrecoverable
+                        );
+    }
+  }
+
+
+  void XMLTypeSystemReader::checkValidityCondition(bool bCondition, TyMessageId tyMessage, icu::UnicodeString const & crString1, icu::UnicodeString const & crString2) const {
+    if (!bCondition) {
+      uima::ErrorMessage msg(tyMessage);
+      msg.addParam( crString1 );
+      msg.addParam( crString2 );
+      UIMA_EXC_THROW_NEW(XMLTypeSystemReaderException,
+                         UIMA_ERR_XMLTYPESYSTEMREADER,
+                         UIMA_MSG_ID_EXC_XMLTYPESYSTEMREADER,
+                         msg,
+                         uima::ErrorInfo::unrecoverable
+                        );
+    }
+  }
+
+
+  XMLTypeSystemReader::XMLTypeSystemReader(TypeSystem & rTypeSystem)
+      : iv_rTypeSystem(uima::lowlevel::TypeSystem::promoteTypeSystem( rTypeSystem )),
+      iv_pXMLErrorHandler(NULL) {}
+
+  XMLTypeSystemReader::XMLTypeSystemReader(uima::internal::CASDefinition & casDef)
+      : iv_rTypeSystem( casDef.getTypeSystem() ),
+      iv_pXMLErrorHandler(NULL) {}
+
+  XMLTypeSystemReader::~XMLTypeSystemReader() {}
+
+  void XMLTypeSystemReader::createFeatures(DOMElement * pTopTypeElement) {
+    DOMNodeList * featureList = pTopTypeElement->getElementsByTagName( convert(uima::internal::XMLConstants::TAGNAME_FEATURE) );
+    unsigned int i=0;
+    for (i=0; i<featureList->getLength(); ++i) {
+      DOMNode * featureNode = featureList->item(i);
+      assert( featureNode->getNodeType() == DOMNode::ELEMENT_NODE );
+      assert( XMLString::compareString( featureNode->getNodeName(), convert(uima::internal::XMLConstants::TAGNAME_FEATURE)) == 0 );
+      DOMElement * featureElement = (DOMElement*) featureNode;
+
+      icu::UnicodeString rangeTypeName = convert( featureElement->getAttribute( convert(uima::internal::XMLConstants::ATTRIBUTENAME_RANGE) ) );
+      icu::UnicodeString featureName = convert( featureElement->getAttribute( convert(uima::internal::XMLConstants::ATTRIBUTENAME_NAME)) );
+      UIMA_TPRINT("Checking for feature  : " << featureName << " with range type " << rangeTypeName);
+
+      DOMNode * introTypeNode = featureNode->getParentNode();
+      assert( introTypeNode->getNodeType() == DOMNode::ELEMENT_NODE );
+
+      checkValidityCondition( XMLString::compareString( introTypeNode->getNodeName(), convert(uima::internal::XMLConstants::TAGNAME_TYPE) ) == 0,
+                              UIMA_MSG_ID_EXC_WRONG_XML_TYPESYSTEM_FORMAT,
+                              featureName
+                            );
+
+      DOMElement * pIntroTypeElement = (DOMElement *) introTypeNode;
+      icu::UnicodeString introTypeName = convert( pIntroTypeElement->getAttribute(convert(uima::internal::XMLConstants::ATTRIBUTENAME_NAME)) );
+      UIMA_TPRINT("Checking for feature  : " << featureName << " with range type " << rangeTypeName << " at intro type " << introTypeName);
+
+      lowlevel::TyFSType tyIntro = iv_rTypeSystem.getTypeByName(introTypeName);
+      checkValidityCondition( iv_rTypeSystem.isValidType(tyIntro),
+                              UIMA_MSG_ID_EXC_INVALID_INTRO_TYPE,
+                              introTypeName );
+      lowlevel::TyFSType tyRange = iv_rTypeSystem.getTypeByName(rangeTypeName);
+      checkValidityCondition( iv_rTypeSystem.isValidType(tyRange),
+                              UIMA_MSG_ID_EXC_INVALID_RANGE_TYPE,
+                              rangeTypeName );
+      lowlevel::TyFSFeature tyFeature = iv_rTypeSystem.getFeatureByBaseName(tyIntro, featureName );
+      if (tyFeature != lowlevel::TypeSystem::INVALID_FEATURE) {
+        // check that intro and range types are correct
+        checkValidityCondition( tyIntro == iv_rTypeSystem.getIntroType(tyFeature),
+                                UIMA_MSG_ID_EXC_INVALID_INTRO_TYPE,
+                                featureName,
+                                introTypeName );
+        checkValidityCondition( tyRange == iv_rTypeSystem.getRangeType(tyFeature),
+                                UIMA_MSG_ID_EXC_INVALID_RANGE_TYPE,
+                                featureName,
+                                rangeTypeName );
+      } else {
+        UIMA_TPRINT("Creating feature  : " << featureName << " with range type " << rangeTypeName << " at intro type " << introTypeName);
+        // create the feature
+        tyFeature = iv_rTypeSystem.createFeature( tyIntro, tyRange, featureName, iv_ustrCreatorID );
+      }
+    }
+  }
+
+
+  void XMLTypeSystemReader::createType(lowlevel::TyFSType tyParentType, DOMElement * pNewTypeElement) {
+    UIMA_TPRINT("entering createType");
+    assert( XMLString::compareString( pNewTypeElement->getNodeName(), convert(uima::internal::XMLConstants::TAGNAME_TYPE)) == 0 );
+    lowlevel::TyFSType tyNewType = lowlevel::TypeSystem::INVALID_TYPE;
+    if (tyParentType != lowlevel::TypeSystem::INVALID_TYPE) {
+      assert( iv_rTypeSystem.isValidType( tyParentType ) );
+      // create the type
+      icu::UnicodeString newTypeName = convert( pNewTypeElement->getAttribute( convert(uima::internal::XMLConstants::ATTRIBUTENAME_NAME )) );
+
+      tyNewType = iv_rTypeSystem.getTypeByName( newTypeName );
+      UIMA_TPRINT("Checking for type : " << newTypeName);
+      if (tyNewType == lowlevel::TypeSystem::INVALID_TYPE) {
+        UIMA_TPRINT("Creating type : " << newTypeName);
+        tyNewType = iv_rTypeSystem.createType(tyParentType, newTypeName, iv_ustrCreatorID);
+      } else {
+        checkValidityCondition( iv_rTypeSystem.getParentType(tyNewType) == tyParentType,
+                                UIMA_MSG_ID_EXC_WRONG_PARENT_TYPE,
+                                iv_rTypeSystem.getTypeName(tyNewType) );
+      }
+    } else {
+      tyNewType = iv_rTypeSystem.getTopType();
+    }
+    assert( iv_rTypeSystem.isValidType( tyNewType ) );
+
+
+    DOMNodeList * childTypes = pNewTypeElement->getChildNodes();
+    unsigned int i=0;
+    for (i=0; i<childTypes->getLength(); ++i) {
+      // filter type children
+      DOMNode * kid = childTypes->item(i);
+      bool bIsElement = ( kid->getNodeType() == DOMNode::ELEMENT_NODE );
+      bool bIsTypeTag = ( XMLString::compareString(kid->getNodeName(), convert(uima::internal::XMLConstants::TAGNAME_TYPE)) == 0 );
+      if (bIsTypeTag && bIsElement) {
+        DOMElement * pChildrenTypeElement = (DOMElement*) kid;
+        createType(tyNewType, pChildrenTypeElement);
+      }
+    }
+
+
+    UIMA_TPRINT("exiting createType");
+  }
+
+
+  void XMLTypeSystemReader::readMemory(icu::UnicodeString const & xmlString, icu::UnicodeString const & creatorID) {
+    UChar const * xmlChars = xmlString.getBuffer();
+    size_t uiBytes = xmlString.length() * 2;
+    UnicodeStringRef uref(xmlString);
+    readMemory(uref.asUTF8().c_str(), creatorID);
+  }
+
+
+  void XMLTypeSystemReader::readMemory(char const * cpszXMLString, icu::UnicodeString const & creatorID) {
+    MemBufInputSource memIS((XMLByte const *) cpszXMLString, strlen(cpszXMLString), "sysID");
+    read(memIS, creatorID );
+  }
+
+
+  void XMLTypeSystemReader::readFile(char const * fileName, icu::UnicodeString const & creatorID) {
+    // convert to unicode using the default converter for the platform (W/1252 U/utf-8)
+    icu::UnicodeString ustrFileName(fileName);
+    readFile( ustrFileName, creatorID );
+  }
+
+  void XMLTypeSystemReader::readFile(icu::UnicodeString const & fileName, icu::UnicodeString const & creatorID) {
+    size_t uiLen = fileName.length();
+    UChar* arBuffer = new UChar[uiLen + 1];
+    assert( arBuffer != NULL );
+
+    fileName.extract(0, uiLen, arBuffer);
+    arBuffer[uiLen] = 0; // terminate the buffer with 0
+
+    LocalFileInputSource fileIS((XMLCh const *) arBuffer );
+
+    read(fileIS, creatorID );
+
+    delete[] arBuffer;
+  }
+
+
+  void XMLTypeSystemReader::setErrorHandler(ErrorHandler * pErrorHandler) {
+    iv_pXMLErrorHandler = pErrorHandler;
+  }
+
+
+  void XMLTypeSystemReader::read(InputSource const & crInputSource, icu::UnicodeString const & creatorID) {
+    UIMA_TPRINT("read() entered");
+    iv_ustrCreatorID = creatorID;
+    XercesDOMParser parser;
+    parser.setValidationScheme(XercesDOMParser::Val_Auto);
+    parser.setDoNamespaces(false);
+    parser.setDoSchema(false);
+
+    bool bHasOwnErrorHandler = false;
+    if (iv_pXMLErrorHandler == NULL) {
+      iv_pXMLErrorHandler = new XMLErrorHandler();
+      assert( iv_pXMLErrorHandler != NULL );
+      bHasOwnErrorHandler = true;
+    }
+    parser.setErrorHandler(iv_pXMLErrorHandler);
+
+    parser.parse( crInputSource);
+    DOMDocument* doc = parser.getDocument();
+    assert(EXISTS(doc));
+
+    // get top node
+    DOMElement * rootElem = doc->getDocumentElement();
+    assert(EXISTS(rootElem));
+
+    /* taph 02.10.2002: do we need to do the validity checking ourselves?
+       Adding an (inline) DTD does that better then we could ever do it.
+       And it is expensive because of the conversions. */
+    icu::UnicodeString ustrTAGNAME_TYPEHIERARCHY(uima::internal::XMLConstants::TAGNAME_TYPEHIERARCHY);
+    icu::UnicodeString ustrTAGNAME_TYPE(uima::internal::XMLConstants::TAGNAME_TYPE);
+    assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
+    icu::UnicodeString ustrRootName( (UChar const *) rootElem->getNodeName());
+    UIMA_TPRINT("root element name: "<< ustrRootName );
+    checkValidityCondition( ustrRootName == ustrTAGNAME_TYPEHIERARCHY,
+                            UIMA_MSG_ID_EXC_WRONG_XML_TYPESYSTEM_FORMAT,
+                            ustrRootName );
+
+    DOMNodeList * children = rootElem->getChildNodes();
+    assert(EXISTS(children));
+
+    checkValidityCondition( children->getLength() > 0 );
+    unsigned int i=0;
+    while (i<children->getLength() ) {
+      DOMNode * kid = children->item(i);
+      assert(EXISTS(kid));
+      // kid should be the element of the top type
+      if ( kid->getNodeType() == DOMNode::ELEMENT_NODE ) {
+        UIMA_TPRINT("in element node block");
+
+        DOMElement * kidElem = (DOMElement*) kid;
+        /* taph 02.10.2002: do we need to do the validity checking ourselves?
+           Adding an (inline) DTD does that better then we could ever do it.
+           And it is expensive because of the conversions. */
+        assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
+        icu::UnicodeString ustrKidName((UChar const *) kidElem->getNodeName());
+        checkValidityCondition( ustrKidName == ustrTAGNAME_TYPE,
+                                UIMA_MSG_ID_EXC_WRONG_XML_TYPESYSTEM_FORMAT,
+                                ustrKidName );
+
+        createType(lowlevel::TypeSystem::INVALID_TYPE, kidElem );
+        createFeatures(kidElem);
+        break;
+      }
+      ++i;
+    }
+
+    if (bHasOwnErrorHandler) {
+      assert( EXISTS(iv_pXMLErrorHandler) );
+      delete iv_pXMLErrorHandler;
+      iv_pXMLErrorHandler = NULL;
+    }
+    UIMA_TPRINT("Exiting read()");
+  }
+
+
+} // namespace uima
+
+/* ----------------------------------------------------------------------- */
+/*       Implementation                                                    */
+/* ----------------------------------------------------------------------- */
+
+
+/* ----------------------------------------------------------------------- */
+
+
+
+

Propchange: incubator/uima/uimacpp/trunk/src/cas/xmltypesystemreader.cpp
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/uima/uimacpp/trunk/src/cas/xmltypesystemwriter.cpp
URL: http://svn.apache.org/viewvc/incubator/uima/uimacpp/trunk/src/cas/xmltypesystemwriter.cpp?view=auto&rev=503248
==============================================================================
--- incubator/uima/uimacpp/trunk/src/cas/xmltypesystemwriter.cpp (added)
+++ incubator/uima/uimacpp/trunk/src/cas/xmltypesystemwriter.cpp Sat Feb  3 08:54:09 2007
@@ -0,0 +1,117 @@
+/** \file xmltypesystemwriter.cpp .
+-----------------------------------------------------------------------------
+
+
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+
+-----------------------------------------------------------------------------
+
+   Description:
+
+-----------------------------------------------------------------------------
+
+
+-------------------------------------------------------------------------- */
+
+
+/* ----------------------------------------------------------------------- */
+/*       Include dependencies                                              */
+/* ----------------------------------------------------------------------- */
+#include "uima/pragmas.hpp"
+#include "uima/xmltypesystemwriter.hpp"
+#include "uima/internal_xmlconstants.hpp"
+#include "uima/internal_casimpl.hpp"
+/* ----------------------------------------------------------------------- */
+/*       Constants                                                         */
+/* ----------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------- */
+/*       Forward declarations                                              */
+/* ----------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------- */
+/*       Types / Classes                                                   */
+/* ----------------------------------------------------------------------- */
+
+using namespace std;
+
+namespace uima {
+
+  XMLTypeSystemWriter::XMLTypeSystemWriter(CAS const & crCAS)
+      : iv_crCAS( crCAS) {}
+
+  void XMLTypeSystemWriter::writeType(ostream & os, uima::Type const & crType) const {
+    UnicodeStringRef typeName( crType.getName() );
+
+    os << "<" << uima::internal::XMLConstants::TAGNAME_TYPE
+    << " " << uima::internal::XMLConstants::ATTRIBUTENAME_NAME << "=\"" << typeName << "\">" << endl;
+    vector<uima::Feature> features;
+    crType.getAppropriateFeatures(features);
+    size_t i;
+    for (i=0; i<features.size(); ++i) {
+      assert( features[i].isValid());
+      // print only features which are introduced at this type
+      Type introType;
+      features[i].getIntroType(introType);
+      assert( introType.isValid() );
+      if (introType == crType) {
+        Type range;
+        features[i].getRangeType(range);
+        assert( range.isValid());
+        os <<"<" << uima::internal::XMLConstants::TAGNAME_FEATURE
+        << " " <<  uima::internal::XMLConstants::ATTRIBUTENAME_NAME << "=\"" << features[i].getName()
+        << "\" " << uima::internal::XMLConstants::ATTRIBUTENAME_RANGE << "=\"" << range.getName() << "\"/>" << endl;
+      }
+    }
+    vector<uima::Type> subTypes;
+    crType.getDirectSubTypes(subTypes);
+    for (i=0; i<subTypes.size(); ++i) {
+      assert( subTypes[i].isValid() );
+      writeType(os, subTypes[i]);
+    }
+    os << "</" << uima::internal::XMLConstants::TAGNAME_TYPE << ">" << endl;
+  }
+
+  void XMLTypeSystemWriter::write(std::ostream & os) const {
+    os << "<?xml version=\"1.0\"?>" << endl;
+    os << "<!DOCTYPE " << uima::internal::XMLConstants::TAGNAME_TYPEHIERARCHY << " [\n"
+    "<!ELEMENT " << uima::internal::XMLConstants::TAGNAME_TYPEHIERARCHY << " (type*)>\n"
+    "<!ELEMENT " << uima::internal::XMLConstants::TAGNAME_TYPE << " (" << uima::internal::XMLConstants::TAGNAME_FEATURE << "|" << uima::internal::XMLConstants::TAGNAME_TYPE << ")*>\n"
+    "<!ATTLIST " << uima::internal::XMLConstants::TAGNAME_TYPE << " " << uima::internal::XMLConstants::ATTRIBUTENAME_NAME << " CDATA #REQUIRED>\n"
+    "<!ELEMENT " << uima::internal::XMLConstants::TAGNAME_FEATURE << " EMPTY>\n"
+    "<!ATTLIST " << uima::internal::XMLConstants::TAGNAME_FEATURE << " " << uima::internal::XMLConstants::ATTRIBUTENAME_NAME << " CDATA #REQUIRED " << uima::internal::XMLConstants::ATTRIBUTENAME_RANGE << " CDATA #REQUIRED >\n"
+    "]>" << endl;
+
+    os << "<" << uima::internal::XMLConstants::TAGNAME_TYPEHIERARCHY << ">" << endl;
+    Type top = iv_crCAS.getTypeSystem().getTopType();
+    writeType(os, top);
+    os << "</" << uima::internal::XMLConstants::TAGNAME_TYPEHIERARCHY << ">" << endl;
+
+  }
+
+}
+
+/* ----------------------------------------------------------------------- */
+/*       Implementation                                                    */
+/* ----------------------------------------------------------------------- */
+
+
+/* ----------------------------------------------------------------------- */
+
+
+

Propchange: incubator/uima/uimacpp/trunk/src/cas/xmltypesystemwriter.cpp
------------------------------------------------------------------------------
    svn:eol-style = native