You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ma...@apache.org on 2012/12/21 17:48:16 UTC

svn commit: r1425020 - in /incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core: ae/DocumentIdPrinterAnalysisEngine.java cr/XMIReader.java util/CtakesFileNamer.java

Author: mattcoarr
Date: Fri Dec 21 16:48:16 2012
New Revision: 1425020

URL: http://svn.apache.org/viewvc?rev=1425020&view=rev
Log:
added to ctakes-core:
- XMIReader
- DocumentIdPrinterAnalysisEngine (prints doc name as each document is executed in a pipeline)
- CtakesFileNamer (used by an XWriter to use ctakes document id for naming output files)

Added:
    incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/DocumentIdPrinterAnalysisEngine.java
    incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/XMIReader.java
    incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/CtakesFileNamer.java

Added: incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/DocumentIdPrinterAnalysisEngine.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/DocumentIdPrinterAnalysisEngine.java?rev=1425020&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/DocumentIdPrinterAnalysisEngine.java (added)
+++ incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/DocumentIdPrinterAnalysisEngine.java Fri Dec 21 16:48:16 2012
@@ -0,0 +1,24 @@
+package org.apache.ctakes.core.ae;
+
+
+import java.util.logging.Logger;
+
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+
+public class DocumentIdPrinterAnalysisEngine extends JCasAnnotator_ImplBase
+{
+  protected final Logger logger = Logger.getLogger(DocumentIdPrinterAnalysisEngine.class.getName());
+
+  @Override
+  public void process(JCas jcas) throws AnalysisEngineProcessException
+  {
+    String documentId = DocumentIDAnnotationUtil.getDocumentID(jcas);
+    String logMessage = String.format("##### current file document id: \"%s\"", documentId);
+    logger.info(logMessage);
+    System.out.println(logMessage);
+  }
+
+}

Added: incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/XMIReader.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/XMIReader.java?rev=1425020&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/XMIReader.java (added)
+++ incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/XMIReader.java Fri Dec 21 16:48:16 2012
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.core.cr;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.uima.UimaContext;
+import org.apache.uima.cas.impl.XmiCasDeserializer;
+import org.apache.uima.collection.CollectionException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.Progress;
+import org.apache.uima.util.ProgressImpl;
+import org.uimafit.component.JCasCollectionReader_ImplBase;
+import org.uimafit.descriptor.ConfigurationParameter;
+import org.xml.sax.SAXException;
+
+/**
+ * UIMA CollectionReader that reads in CASes from XMI files.
+ */
+public class XMIReader extends JCasCollectionReader_ImplBase {
+
+  public static final String PARAM_FILES = "files";
+
+  @ConfigurationParameter(
+      name = PARAM_FILES,
+      mandatory = true,
+      description = "The XMI files to be loaded")
+  private List<File> files;
+
+  private Iterator<File> filesIter;
+
+  private int completed;
+
+  @Override
+  public void initialize(UimaContext context) throws ResourceInitializationException {
+    super.initialize(context);
+    this.filesIter = files.iterator();
+    this.completed = 0;
+  }
+
+  @Override
+  public Progress[] getProgress() {
+    return new Progress[] { new ProgressImpl(this.completed, this.files.size(), Progress.ENTITIES) };
+  }
+
+  @Override
+  public boolean hasNext() throws IOException, CollectionException {
+    return this.filesIter.hasNext();
+  }
+
+  @Override
+  public void getNext(JCas jCas) throws IOException, CollectionException {
+    FileInputStream inputStream = new FileInputStream(this.filesIter.next());
+    try {
+      XmiCasDeserializer.deserialize(new BufferedInputStream(inputStream), jCas.getCas());
+    } catch (SAXException e) {
+      throw new CollectionException(e);
+    }
+    inputStream.close();
+    this.completed += 1;
+  }
+
+}
\ No newline at end of file

Added: incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/CtakesFileNamer.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/CtakesFileNamer.java?rev=1425020&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/CtakesFileNamer.java (added)
+++ incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/CtakesFileNamer.java Fri Dec 21 16:48:16 2012
@@ -0,0 +1,87 @@
+package org.apache.ctakes.core.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.uima.UimaContext;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.uimafit.component.initialize.ConfigurationParameterInitializer;
+import org.uimafit.component.xwriter.XWriterFileNamer;
+import org.uimafit.descriptor.ConfigurationParameter;
+import org.uimafit.factory.ConfigurationParameterFactory;
+import org.uimafit.factory.initializable.Initializable;
+
+/**
+ * This is a very simple implementation of XWriterFileNamer that generates file names based on a
+ * prefix string and a incrementing counter.
+ * 
+ * @author Philip Ogren
+ */
+
+public class CtakesFileNamer implements XWriterFileNamer, Initializable {
+
+        /**
+         * The parameter name for the configuration parameter that specifies a fixed prefix for all
+         * returned file names.
+         */
+        public static final String PARAM_PREFIX = ConfigurationParameterFactory
+                        .createConfigurationParameterName(CtakesFileNamer.class, "prefix");
+        @ConfigurationParameter(description = "specify a prefix that is prepended to all returned file names", defaultValue="")
+        private String prefix;
+
+        /**
+         * The parameter name for the configuration parameter that specifies a fixed suffix for all
+         * returned file names.
+         */
+        public static final String PARAM_SUFFIX = ConfigurationParameterFactory
+                        .createConfigurationParameterName(CtakesFileNamer.class, "suffix");
+        @ConfigurationParameter(description = "specify a suffix that is appended to all returned file names", defaultValue="")
+        private String suffix;
+
+        int i = 1;
+
+        public String nameFile(JCas jcas)
+        {
+          String sourceFileName = DocumentIDAnnotationUtil.getDocumentID(jcas);
+          StringBuilder b = new StringBuilder();
+          if (prefix != null && !prefix.isEmpty())
+          { b.append(prefix); }
+          
+          if (sourceFileName != null && !sourceFileName.isEmpty())
+          {
+        	  b.append(sourceFileName);
+          } else
+          {
+        	  b.append(i++);
+          }
+          
+          if (suffix != null && !suffix.isEmpty())
+          { b.append(suffix); }
+          
+          String calculatedFilename = b.toString();
+          
+          return calculatedFilename;
+        }
+
+        public void initialize(UimaContext context) throws ResourceInitializationException {
+                ConfigurationParameterInitializer.initialize(this, context);
+        }
+}
\ No newline at end of file