You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ma...@apache.org on 2012/12/21 17:48:16 UTC
svn commit: r1425020 - in
/incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core:
ae/DocumentIdPrinterAnalysisEngine.java cr/XMIReader.java
util/CtakesFileNamer.java
Author: mattcoarr
Date: Fri Dec 21 16:48:16 2012
New Revision: 1425020
URL: http://svn.apache.org/viewvc?rev=1425020&view=rev
Log:
added to ctakes-core:
- XMIReader
- DocumentIdPrinterAnalysisEngine (prints doc name as each document is executed in a pipeline)
- CtakesFileNamer (used by an XWriter to use ctakes document id for naming output files)
Added:
incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/DocumentIdPrinterAnalysisEngine.java
incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/XMIReader.java
incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/CtakesFileNamer.java
Added: incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/DocumentIdPrinterAnalysisEngine.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/DocumentIdPrinterAnalysisEngine.java?rev=1425020&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/DocumentIdPrinterAnalysisEngine.java (added)
+++ incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/DocumentIdPrinterAnalysisEngine.java Fri Dec 21 16:48:16 2012
@@ -0,0 +1,24 @@
+package org.apache.ctakes.core.ae;
+
+
+import java.util.logging.Logger;
+
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+
+public class DocumentIdPrinterAnalysisEngine extends JCasAnnotator_ImplBase
+{
+ protected final Logger logger = Logger.getLogger(DocumentIdPrinterAnalysisEngine.class.getName());
+
+ @Override
+ public void process(JCas jcas) throws AnalysisEngineProcessException
+ {
+ String documentId = DocumentIDAnnotationUtil.getDocumentID(jcas);
+ String logMessage = String.format("##### current file document id: \"%s\"", documentId);
+ logger.info(logMessage);
+ System.out.println(logMessage);
+ }
+
+}
Added: incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/XMIReader.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/XMIReader.java?rev=1425020&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/XMIReader.java (added)
+++ incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/XMIReader.java Fri Dec 21 16:48:16 2012
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.core.cr;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.uima.UimaContext;
+import org.apache.uima.cas.impl.XmiCasDeserializer;
+import org.apache.uima.collection.CollectionException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.Progress;
+import org.apache.uima.util.ProgressImpl;
+import org.uimafit.component.JCasCollectionReader_ImplBase;
+import org.uimafit.descriptor.ConfigurationParameter;
+import org.xml.sax.SAXException;
+
+/**
+ * UIMA CollectionReader that reads in CASes from XMI files.
+ */
+public class XMIReader extends JCasCollectionReader_ImplBase {
+
+ public static final String PARAM_FILES = "files";
+
+ @ConfigurationParameter(
+ name = PARAM_FILES,
+ mandatory = true,
+ description = "The XMI files to be loaded")
+ private List<File> files;
+
+ private Iterator<File> filesIter;
+
+ private int completed;
+
+ @Override
+ public void initialize(UimaContext context) throws ResourceInitializationException {
+ super.initialize(context);
+ this.filesIter = files.iterator();
+ this.completed = 0;
+ }
+
+ @Override
+ public Progress[] getProgress() {
+ return new Progress[] { new ProgressImpl(this.completed, this.files.size(), Progress.ENTITIES) };
+ }
+
+ @Override
+ public boolean hasNext() throws IOException, CollectionException {
+ return this.filesIter.hasNext();
+ }
+
+ @Override
+ public void getNext(JCas jCas) throws IOException, CollectionException {
+ FileInputStream inputStream = new FileInputStream(this.filesIter.next());
+ try {
+ XmiCasDeserializer.deserialize(new BufferedInputStream(inputStream), jCas.getCas());
+ } catch (SAXException e) {
+ throw new CollectionException(e);
+ }
+ inputStream.close();
+ this.completed += 1;
+ }
+
+}
\ No newline at end of file
Added: incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/CtakesFileNamer.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/CtakesFileNamer.java?rev=1425020&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/CtakesFileNamer.java (added)
+++ incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/CtakesFileNamer.java Fri Dec 21 16:48:16 2012
@@ -0,0 +1,87 @@
+package org.apache.ctakes.core.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.uima.UimaContext;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.uimafit.component.initialize.ConfigurationParameterInitializer;
+import org.uimafit.component.xwriter.XWriterFileNamer;
+import org.uimafit.descriptor.ConfigurationParameter;
+import org.uimafit.factory.ConfigurationParameterFactory;
+import org.uimafit.factory.initializable.Initializable;
+
+/**
+ * This is a very simple implementation of XWriterFileNamer that generates file names based on a
+ * prefix string and a incrementing counter.
+ *
+ * @author Philip Ogren
+ */
+
+public class CtakesFileNamer implements XWriterFileNamer, Initializable {
+
+ /**
+ * The parameter name for the configuration parameter that specifies a fixed prefix for all
+ * returned file names.
+ */
+ public static final String PARAM_PREFIX = ConfigurationParameterFactory
+ .createConfigurationParameterName(CtakesFileNamer.class, "prefix");
+ @ConfigurationParameter(description = "specify a prefix that is prepended to all returned file names", defaultValue="")
+ private String prefix;
+
+ /**
+ * The parameter name for the configuration parameter that specifies a fixed suffix for all
+ * returned file names.
+ */
+ public static final String PARAM_SUFFIX = ConfigurationParameterFactory
+ .createConfigurationParameterName(CtakesFileNamer.class, "suffix");
+ @ConfigurationParameter(description = "specify a suffix that is appended to all returned file names", defaultValue="")
+ private String suffix;
+
+ int i = 1;
+
+ public String nameFile(JCas jcas)
+ {
+ String sourceFileName = DocumentIDAnnotationUtil.getDocumentID(jcas);
+ StringBuilder b = new StringBuilder();
+ if (prefix != null && !prefix.isEmpty())
+ { b.append(prefix); }
+
+ if (sourceFileName != null && !sourceFileName.isEmpty())
+ {
+ b.append(sourceFileName);
+ } else
+ {
+ b.append(i++);
+ }
+
+ if (suffix != null && !suffix.isEmpty())
+ { b.append(suffix); }
+
+ String calculatedFilename = b.toString();
+
+ return calculatedFilename;
+ }
+
+ public void initialize(UimaContext context) throws ResourceInitializationException {
+ ConfigurationParameterInitializer.initialize(this, context);
+ }
+}
\ No newline at end of file