You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by ea...@apache.org on 2007/02/03 18:03:51 UTC
svn commit: r503260 - in /incubator/uima/uimacpp/trunk/src/utils: ./
Makefile.am runAECpp.cpp
Author: eae
Date: Sat Feb 3 09:03:50 2007
New Revision: 503260
URL: http://svn.apache.org/viewvc?view=rev&rev=503260
Log:
Initial entry
Added:
incubator/uima/uimacpp/trunk/src/utils/
incubator/uima/uimacpp/trunk/src/utils/Makefile.am (with props)
incubator/uima/uimacpp/trunk/src/utils/runAECpp.cpp (with props)
Added: incubator/uima/uimacpp/trunk/src/utils/Makefile.am
URL: http://svn.apache.org/viewvc/incubator/uima/uimacpp/trunk/src/utils/Makefile.am?view=auto&rev=503260
==============================================================================
--- incubator/uima/uimacpp/trunk/src/utils/Makefile.am (added)
+++ incubator/uima/uimacpp/trunk/src/utils/Makefile.am Sat Feb 3 09:03:50 2007
@@ -0,0 +1,49 @@
+# -*- makefile -*-
+
+ # Licensed to the Apache Software Foundation (ASF) under one
+ # or more contributor license agreements. See the NOTICE file
+ # distributed with this work for additional information
+ # regarding copyright ownership. The ASF licenses this file
+ # to you under the Apache License, Version 2.0 (the
+ # "License"); you may not use this file except in compliance
+ # with the License. You may obtain a copy of the License at
+ #
+ # http://www.apache.org/licenses/LICENSE-2.0
+ #
+ # Unless required by applicable law or agreed to in writing,
+ # software distributed under the License is distributed on an
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ # KIND, either express or implied. See the License for the
+ # specific language governing permissions and limitations
+ # under the License.
+
+bin_PROGRAMS = $(UTILS_BIN)
+
+UTILS_BIN= runAECpp
+
+test_tgt: $(UTILS_BIN)
+
+clean:
+ rm -f $(UTILS_BIN) $(runAECpp_OBJECTS)
+
+if DEBUG_TEST
+AM_CPPFLAGS = -g -DDEBUG
+else
+AM_CPPFLAGS = -O3 -DNDEBUG -DTRACEOFF
+endif
+AM_CPPFLAGS += -D_GNU_SOURCE
+
+# Set default for all target_LDADD values
+LDADD = $(UIMA_LIB) $(UIMA_XERCES_LIB) $(UIMA_ICU_LIB) -ldl $(UIMACPP_HOME_LIB)
+
+runAECpp_SOURCES = runAECpp.cpp
+runAECpp_CPPFLAGS = -g -DDEBUG
+runAECpp_CPPFLAGS += -D_GNU_SOURCE
+
+INCLUDES = -I.
+INCLUDES += -I../framework
+INCLUDES += -I../cas
+INCLUDES += $(UIMA_XERCES_INCLUDE)
+INCLUDES += $(UIMA_APR_INCLUDE)
+INCLUDES += $(UIMA_ICU_INCLUDE)
+INCLUDES += $(UIMACPP_HOME_INCLUDE)
Propchange: incubator/uima/uimacpp/trunk/src/utils/Makefile.am
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/uima/uimacpp/trunk/src/utils/runAECpp.cpp
URL: http://svn.apache.org/viewvc/incubator/uima/uimacpp/trunk/src/utils/runAECpp.cpp?view=auto&rev=503260
==============================================================================
--- incubator/uima/uimacpp/trunk/src/utils/runAECpp.cpp (added)
+++ incubator/uima/uimacpp/trunk/src/utils/runAECpp.cpp Sat Feb 3 09:03:50 2007
@@ -0,0 +1,364 @@
+/*------------------------------------------------------------------------
+
+ Test driver that reads text files or XCASs and calls the annotator
+
+-------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------- */
+/* Include dependencies */
+/* ----------------------------------------------------------------------- */
+
+#include <stdio.h>
+#include <errno.h>
+#include <sys/stat.h>
+
+
+#include "xercesc/framework/LocalFileInputSource.hpp"
+#include "xercesc/util/PlatformUtils.hpp"
+#include "xercesc/util/XMLString.hpp"
+
+#include "uima/pragmas.hpp"
+#include "uima/filename.hpp"
+#include "uima/dirwalk.hpp"
+
+#include "uima/api.hpp"
+#include "uima/xmlwriter.hpp"
+#include "uima/xcasdeserializer.hpp"
+
+using namespace uima;
+
+
+/* ----------------------------------------------------------------------- */
+/* Implementation */
+/* ----------------------------------------------------------------------- */
+
+/* Little helper routine to check and report errors.
+ This routine just does a hard program exit for any failure! */
+static void tafCheckError(TyErrorId utErrorId,
+ const AnalysisEngine & crEngine);
+static void tafCheckError(ErrorInfo const &);
+
+// sofa to use for creating a tcas
+bool useSofa;
+const char* sofaName;
+///input data in xcas format
+bool xcasInput;
+void process (AnalysisEngine * pEngine, CAS * cas, std::string in, std::string out);
+void writeXCAS (CAS & outCas, int num, std::string in, std::string outfn);
+
+void tell() {
+ cerr << "Usage: runAECpp UimaCppDescriptor <-x> InputFileOrDir <OutputDir>" << endl
+ << " <-s Sofa> <-l LogLevel>" << endl;
+ cerr << " UimaCppDescriptor Analysis Engine descriptor for a CPP annotator" << endl;
+ cerr << " InputFileOrDir Input file or directory of files to process" << endl;
+ cerr << " OutputDir Existing directory for XCAS outputs (optional)" << endl;
+ cerr << " Options:" << endl;
+ cerr << " -x Input(s) must be in XCAS format (default is raw text)" << endl;
+ cerr << " -s Sofa Name of a Sofa to process (input must be an XCAS)" << endl;
+ cerr << " -l logLevel Set to 0, 1, or 2 for Message, Warning, or Error" << endl;
+}
+
+int main(int argc, char * argv[]) {
+
+ int loglevel = -1;
+
+ try {
+
+ /* Access the command line arguments to get the name of the input text. */
+ //if (argc != 3 && argc != 5 && argc != 7 && argc != 9 && argc != 11) {
+ if (argc < 3) {
+ tell();
+ return 1;
+ }
+ useSofa = false;
+ xcasInput = false;
+ /* input/output dir arg */
+ std::string in;
+ std::string out;
+ std::string sofa;
+ std::string pattern("*");
+ const char* cnfg = NULL;
+
+ int index = 0;
+ while (++index < argc) {
+ char* arg = argv[index];
+ if (0 == strcmp(arg, "-x")) {
+ xcasInput = true;
+ } else if (0 == strcmp(arg, "-s")) {
+ if ( ++index < argc ) {
+ sofaName = argv[index];
+ useSofa = true;
+ }
+ } else if (0 == strcmp(arg, "-l")) {
+ if ( ++index < argc ) {
+ loglevel = atoi(argv[index]);
+ if (loglevel < LogStream::EnMessage) {
+ cerr << "LogLevel less than minimum value (Message) = " << LogStream::EnMessage << endl;
+ return 1;
+ }
+ if (loglevel > LogStream::EnError) {
+ cerr << "LogLevel greater than maximum value (Error) = " << LogStream::EnError << endl;
+ return 1;
+ }
+ }
+ } else { //one of the standard params - whichever we haven't read yet
+ if (cnfg == NULL) {
+ cnfg = arg;
+ } else if (in.length() == 0) {
+ in.append(arg);
+ } else if (out.length() == 0) {
+ out.append(arg);
+ }
+ }
+ } //while
+
+ if (in.length() == 0 || index > argc) { // Too few args or no arg after -s or -l
+ tell();
+ return 1;
+ }
+
+ if (out == in) {
+ cout << "runAECpp: ERROR: input and output file paths are the same " << endl;
+ return -1;
+ }
+
+ /* Create/link up to a UIMACPP resource manager instance (singleton) */
+ (void) ResourceManager::createInstance("runAECpp");
+
+ if (loglevel >= 0) {
+ ResourceManager::getInstance().setLoggingLevel((LogStream::EnEntryType)loglevel);
+ }
+
+ TyErrorId utErrorId; // Variable to store UIMACPP return codes
+ ErrorInfo errorInfo; // Variable to stored detailed error info
+ /* Initialize engine with filename of config-file */
+ AnalysisEngine * pEngine =
+ Framework::createAnalysisEngine(cnfg, errorInfo);
+ tafCheckError(errorInfo);
+
+ /* Get a new CAS */
+ CAS* cas = pEngine->newCAS();
+
+ /* process input xcas */
+ util::DirectoryWalk dirwalker(in.c_str());
+ if (dirwalker.isValid()) {
+ cout << "runAECpp::processing all files in directory: " << in.c_str() << endl;
+ util::Filename infile(in.c_str(),"FilenamePlaceHolder");
+ while (dirwalker.isValid()) {
+ // Process all files or just the ones with matching suffix
+ if ( dirwalker.isFile() ) {
+ infile.setNewName(dirwalker.getNameWithoutPath());
+ std::string afile(infile.getAsCString());
+
+ //process the cas
+ process(pEngine,cas,afile, out);
+
+ //reset the cas
+ cas->reset();
+ }
+ //get the next xcas file in the directory
+ dirwalker.setToNext();
+ }
+ } else {
+ //process the cas
+ process(pEngine,cas, in, out);
+ }
+ /* call collectionProcessComplete */
+ utErrorId = pEngine->collectionProcessComplete();
+
+ /* Free annotator */
+ utErrorId = pEngine->destroy();
+
+ delete cas;
+ delete pEngine;
+ } catch (Exception e) {
+ cout << "runAECpp " << e << endl;
+ }
+ /* If we got this far everything went OK */
+ cout << "runAECpp: processing finished sucessfully! " << endl;
+
+ return(0);
+}
+
+
+
+/* Little helper routine to check and report errors.
+ This routine just does a hard program exit for any failure!
+*/
+static void tafCheckError(TyErrorId utErrorId,
+ const AnalysisEngine & crEngine) {
+ if (utErrorId != UIMA_ERR_NONE) {
+ cerr << "runAECpp:" << endl;
+ cerr << " Error number : "
+ << utErrorId << endl;
+ cerr << " Error string : "
+ << AnalysisEngine::getErrorIdAsCString(utErrorId) << endl;
+ const TCHAR * errStr = crEngine.getAnnotatorContext().getLogger().getLastErrorAsCStr();
+ if (errStr != NULL)
+ cerr << " Last logged message : " << errStr << endl;
+ exit((int)utErrorId);
+ }
+}
+
+/* Similar routine as above just with error info objects instead of err-ids.
+ This routine just does a hard program exit for any failure!
+*/
+static void tafCheckError(ErrorInfo const & errInfo) {
+ if (errInfo.getErrorId() != UIMA_ERR_NONE) {
+ cerr << "runAECpp:" << endl
+ << " Error string : "
+ << AnalysisEngine::getErrorIdAsCString(errInfo.getErrorId()) << endl
+ << " UIMACPP Error info:" << endl
+ << errInfo << endl;
+ exit((int)errInfo.getErrorId());
+ }
+}
+
+void process (AnalysisEngine * pEngine, CAS * cas, std::string in, std::string outfn) {
+ cout << endl << "runAECpp::processing " << in << endl;
+ try {
+ if (xcasInput) {
+ /* initialize from the xcas */
+ //cout << "runAECpp::processing xcas file " << in << endl;
+ LocalFileInputSource fileIS(XMLString::transcode(in.c_str()));
+ XCASDeserializer::deserialize(fileIS, *cas);
+ } else {
+ /* read as text file and set document text of default view */
+ FILE * pFile = fopen(in.c_str(),"rb");
+ int filesize;
+ if (pFile == NULL) {
+ cerr << "RunAECpp: Error reading file " << in << endl;
+ exit(-1);
+ }
+
+ /* allocate buffer for file contents */
+ struct stat fstat;
+ stat(in.c_str(), &fstat);
+ filesize = fstat.st_size;
+ char * pBuffer = new char[filesize+1];
+ if (pBuffer == NULL) {
+ cerr << "RunAECpp: Error allocating buffer to hold contents of file " << in << endl;
+ exit(-1);
+ }
+
+ /* read the file */
+ size_t numread = fread(pBuffer,1,filesize,pFile);
+ fclose(pFile);
+ /* convert to unicode and set tcas document text*/
+ UnicodeString ustrInputText(pBuffer, (int32_t)numread, "utf-8");
+ cas->setDocumentText(UnicodeStringRef(ustrInputText));
+ delete pBuffer;
+ }
+
+ // Is the input a tcas?
+ if (!useSofa && cas->isBackwardCompatibleCas()) {
+ useSofa = true;
+ sofaName = CAS::NAME_DEFAULT_TEXT_SOFA;
+ }
+
+ // Is a specific Sofa view specified?
+ if (useSofa) {
+ /* process the specified TCAS */
+ SofaFS mySofa = cas->getSofa(pEngine->getAnnotatorContext().mapToSofaID(sofaName));
+ if (!mySofa.isValid()) {
+ cerr << "runAECpp:" << endl
+ << " Specified Sofa named " << sofaName
+ << " not found in the XCAS file" << endl;
+ exit(99);
+ }
+
+ CASIterator casIter = pEngine->processAndOutputNewCASes(*cas->getView(mySofa));
+ int i=0;
+ while (casIter.hasNext()) {
+ i++;
+ CAS & outCas = casIter.next();
+
+ //write out xcas
+ if (outfn.length() > 0) {
+ writeXCAS(outCas, i, in, outfn);
+ }
+
+ //release the CAS
+ pEngine->getAnnotatorContext().releaseCAS(outCas);
+
+ cout << "new Cas " << i << endl;
+ }
+
+ } else {
+ /* process the CAS */
+
+ CASIterator casIter = ((AnalysisEngine*)pEngine)->processAndOutputNewCASes(*cas);
+ int i=0;
+ while (casIter.hasNext()) {
+ i++;
+ CAS & outCas = casIter.next();
+ //write out xcas
+ if (outfn.length() > 0) {
+ writeXCAS(outCas, i, in, outfn);
+ }
+
+ //release CAS
+ pEngine->getAnnotatorContext().releaseCAS(outCas);
+
+ cout << "new Cas " << i << endl;
+ }
+
+ }
+
+ if (outfn.length() > 0) {
+ util::Filename infile((TCHAR*) in.c_str());
+
+ outfn.append("/");
+ outfn.append(infile.getName());
+
+ //open a file stream for output xcas
+ ofstream file;
+ file.open (outfn.c_str(), ios::out | ios::binary);
+ if ( !file ) {
+ cerr << "runAECpp: Error opening output xcas: " << outfn.c_str() << endl;
+ exit(99);
+ }
+
+ //serialize the input cas
+ cout << "runAECpp: write out xcas " << outfn << endl;
+ XCASWriter writer(*cas, true);
+ writer.write(file);
+ file.close();
+ }
+
+ } catch (Exception e) {
+ ErrorInfo errInfo = e.getErrorInfo();
+ cerr << "runAECPP::Error " << errInfo.getErrorId() << " " << errInfo.getMessage() << endl;
+ cerr << errInfo << endl;
+ }
+}
+
+void writeXCAS (CAS & outCas, int num, std::string in, std::string outfn) {
+
+ util::Filename infile((TCHAR*) in.c_str());
+ std::string ofn;
+ ofn.append(outfn.c_str());
+ ofn.append("/");
+ ofn.append(infile.getName());
+ ofn.append("_seg_");
+ stringstream s;
+ s << num;
+ ofn.append(s.str());
+
+ //open a file stream for output xcas
+ ofstream file;
+ file.open (ofn.c_str(), ios::out | ios::binary);
+ if ( !file ) {
+ cerr << "Error opening output xcas: " << ofn.c_str() << endl;
+ exit(99);
+ }
+
+ //serialize the cas
+ cout << "write out xcas " << ofn << endl;
+ XCASWriter writer(outCas, true);
+ writer.write(file);
+ file.close();
+}
+
+/* <EOF> */
+
Propchange: incubator/uima/uimacpp/trunk/src/utils/runAECpp.cpp
------------------------------------------------------------------------------
svn:eol-style = native