You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2017/11/08 14:48:40 UTC
svn commit: r1814584 - in
/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core: cr/ resource/
Author: seanfinan
Date: Wed Nov 8 14:48:39 2017
New Revision: 1814584
URL: http://svn.apache.org/viewvc?rev=1814584&view=rev
Log:
Core collection reader updates (refactor)
Added:
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/resource/FileReadWriteUtil.java
Modified:
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/JdbcCollectionReader.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/LinesFromFileCollectionReader.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/TextReader.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/XmiCollectionReaderCtakes.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/resource/LuceneIndexReaderResourceImpl.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/resource/StringIntegerMapResourceImpl.java
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/JdbcCollectionReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/JdbcCollectionReader.java?rev=1814584&r1=1814583&r2=1814584&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/JdbcCollectionReader.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/JdbcCollectionReader.java Wed Nov 8 14:48:39 2017
@@ -30,7 +30,10 @@ import org.apache.uima.resource.Resource
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;
-import java.io.*;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
import java.sql.*;
import java.util.ArrayList;
import java.util.List;
@@ -268,17 +271,9 @@ public class JdbcCollectionReader extend
try
{
- // if there's a CAS Initializer, call it
- if (getCasInitializer() != null)
- {
- Reader reader = new StringReader(document);
- getCasInitializer().initializeCas(reader, cas);
- } else
- {
- // No CAS Initiliazer, so set document text ourselves.
+ // No CAS Initiliazer, so set document text ourselves.
// put document in CAS (assume CAS)
cas.getJCas().setDocumentText(document);
- }
DocumentID docIdAnnot = new DocumentID(cas
.getJCas());
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/LinesFromFileCollectionReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/LinesFromFileCollectionReader.java?rev=1814584&r1=1814583&r2=1814584&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/LinesFromFileCollectionReader.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/LinesFromFileCollectionReader.java Wed Nov 8 14:48:39 2017
@@ -30,7 +30,9 @@ import org.apache.uima.resource.Resource
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;
-import java.io.*;
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
@@ -173,19 +175,9 @@ public class LinesFromFileCollectionRead
iv_logger.debug("id="+id);
iv_logger.debug("text="+text);
- //if there's a CAS Initializer, call it
- if (getCasInitializer() != null)
- {
- Reader reader = new StringReader(text);
- getCasInitializer().initializeCas(reader, cas);
- reader.close();
- }
- else //No CAS Initiliazer, so read file and set document text ourselves
- {
jcas.setDocumentText(text);
- }
-
- //set language if it was explicitly specified as a configuration parameter
+
+ //set language if it was explicitly specified as a configuration parameter
if (iv_language != null)
{
// ((DocumentAnnotation)jcas.getDocumentAnnotationFs()).setLanguage(iv_language);
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/TextReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/TextReader.java?rev=1814584&r1=1814583&r2=1814584&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/TextReader.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/TextReader.java Wed Nov 8 14:48:39 2017
@@ -19,9 +19,9 @@
package org.apache.ctakes.core.cr;
import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.ctakes.core.resource.FileReadWriteUtil;
import org.apache.ctakes.typesystem.type.structured.DocumentID;
import org.apache.uima.UimaContext;
-import org.apache.uima.collection.CasInitializer;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.fit.component.JCasCollectionReader_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
@@ -30,7 +30,8 @@ import org.apache.uima.resource.Resource
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;
-import java.io.*;
+import java.io.File;
+import java.io.IOException;
import java.util.Iterator;
import java.util.List;
@@ -51,68 +52,41 @@ public class TextReader extends JCasColl
name = PARAM_FILES,
mandatory = true,
description = "The text files to be loaded")
- private List<File> files;
+ private List<File> _files;
- private Iterator<File> filesIter;
+ private Iterator<File> _filesIter;
- private int completed;
+ private int _completed;
@Override
- public void initialize(UimaContext context) throws ResourceInitializationException {
- super.initialize(context);
- this.filesIter = files.iterator();
- this.completed = 0;
+ public void initialize( final UimaContext context ) throws ResourceInitializationException {
+ super.initialize( context );
+ _filesIter = _files.iterator();
+ _completed = 0;
}
@Override
public Progress[] getProgress() {
- return new Progress[] { new ProgressImpl(this.completed, this.files.size(), Progress.ENTITIES) };
+ return new Progress[]{ new ProgressImpl( _completed, _files.size(), Progress.ENTITIES ) };
}
@Override
public boolean hasNext() throws IOException, CollectionException {
- return this.filesIter.hasNext();
+ return _filesIter.hasNext();
}
@Override
public void getNext(JCas jCas) throws IOException, CollectionException {
- File currentFile = this.filesIter.next();
- String filename = currentFile.getName();
- FileInputStream fileInputStream = new FileInputStream(currentFile);
- InputStreamReader inputStreamReader = new InputStreamReader(fileInputStream);
- BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
-
- CasInitializer casInitializer = getCasInitializer();
-
- if (casInitializer != null)
- {
- casInitializer.initializeCas(bufferedReader, jCas.getCas());
- }
- else //No CAS Initializer, so read file and set document text ourselves
- {
- try
- {
- byte[] contents = new byte[(int)currentFile.length() ];
- fileInputStream.read( contents );
- String text;
- text = new String(contents);
- //put document in CAS (assume CAS)
- jCas.setDocumentText(text);
- }
- finally
- {
- if (fileInputStream != null)
- fileInputStream.close();
- }
-
- }
+ final File currentFile = _filesIter.next();
+ final String filename = currentFile.getName();
+ final String text = FileReadWriteUtil.readText( filename );
+ jCas.setDocumentText( text );
- DocumentID documentIDAnnotation = new DocumentID(jCas);
- documentIDAnnotation.setDocumentID(filename);
+ final DocumentID documentIDAnnotation = new DocumentID( jCas );
+ documentIDAnnotation.setDocumentID( filename );
documentIDAnnotation.addToIndexes();
-
- this.completed += 1;
+ _completed++;
}
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/XmiCollectionReaderCtakes.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/XmiCollectionReaderCtakes.java?rev=1814584&r1=1814583&r2=1814584&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/XmiCollectionReaderCtakes.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/XmiCollectionReaderCtakes.java Wed Nov 8 14:48:39 2017
@@ -58,8 +58,8 @@ public class XmiCollectionReaderCtakes e
public static final String PARAM_FAILUNKNOWN = "FailOnUnknownType";
private Boolean mFailOnUnknownType;
-
- private ArrayList mFiles;
+
+ private ArrayList<File> mFiles;
private int mCurrentIndex;
@@ -82,8 +82,11 @@ public class XmiCollectionReaderCtakes e
}
// get list of .xmi files in the specified directory
- mFiles = new ArrayList();
+ mFiles = new ArrayList<>();
File[] files = directory.listFiles();
+ if ( files == null ) {
+ return;
+ }
for (int i = 0; i < files.length; i++) {
if (!files[i].isDirectory() && files[i].getName().endsWith(".xmi")) {
mFiles.add(files[i]);
Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/resource/FileReadWriteUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/resource/FileReadWriteUtil.java?rev=1814584&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/resource/FileReadWriteUtil.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/resource/FileReadWriteUtil.java Wed Nov 8 14:48:39 2017
@@ -0,0 +1,78 @@
+package org.apache.ctakes.core.resource;
+
+
+import org.apache.log4j.Logger;
+
+import java.io.*;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 10/23/2017
+ */
+final public class FileReadWriteUtil {
+
+ private FileReadWriteUtil() {
+ }
+
+ static private final Logger LOGGER = Logger.getLogger( "FileReadWriteUtil" );
+
+ /**
+ * Reading text from a file or resource is done everywhere, but a common implementation is missing from ctakes.
+ *
+ * @param path for file or resource.
+ * @return a single string of text from the file.
+ * @throws IOException if the resource cannot be read.
+ */
+ static public String readText( final String path ) throws IOException {
+ final InputStream stream = FileLocator.getAsStream( path );
+ final StringBuilder sb = new StringBuilder();
+ try ( BufferedReader reader = new BufferedReader( new InputStreamReader( stream ) ) ) {
+ String line;
+ while ( (line = reader.readLine()) != null ) {
+ sb.append( line ).append( "\n" );
+ }
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Reading text from a file or resource is done everywhere, but a common implementation is missing from ctakes.
+ *
+ * @param path for file or resource.
+ * @return a list containing each line of text in the file.
+ * @throws IOException if the resource cannot be read.
+ */
+ static public List<String> readLines( final String path ) throws IOException {
+ final InputStream stream = FileLocator.getAsStream( path );
+ final List<String> lines = new ArrayList<>();
+ try ( BufferedReader reader = new BufferedReader( new InputStreamReader( stream ) ) ) {
+ String line;
+ while ( (line = reader.readLine()) != null ) {
+ lines.add( line );
+ }
+ }
+ return lines;
+ }
+
+ /**
+ * Writing text to a file is done everywhere, but a common implementation is missing from ctakes.
+ *
+ * @param text to be written.
+ * @param filepath for output file.
+ * @throws IOException if the file cannot be written.
+ */
+ static public void writeText( final String text, final String filepath ) throws IOException {
+ final Path path = Paths.get( filepath );
+ try ( Writer writer = Files.newBufferedWriter( path ) ) {
+ writer.write( text );
+ }
+ }
+
+
+}
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/resource/LuceneIndexReaderResourceImpl.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/resource/LuceneIndexReaderResourceImpl.java?rev=1814584&r1=1814583&r2=1814584&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/resource/LuceneIndexReaderResourceImpl.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/resource/LuceneIndexReaderResourceImpl.java Wed Nov 8 14:48:39 2017
@@ -18,21 +18,21 @@
*/
package org.apache.ctakes.core.resource;
-import java.io.File;
-
import org.apache.log4j.Logger;
+import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.RAMDirectory;
-
import org.apache.uima.resource.DataResource;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.SharedResourceObject;
import org.apache.uima.resource.metadata.ConfigurationParameterSettings;
-/**
+import java.io.File;
+
+/**
* Oct 2010 - convert to lucene 3.0.2
* @author Mayo Clinic
*/
@@ -55,7 +55,7 @@ public class LuceneIndexReaderResourceIm
String indexDirStr = (String) cps.getParameterValue("IndexDirectory");
try {
- File indexDir = FileLocator.locateFile(indexDirStr);
+ File indexDir = FileLocator.getFile( indexDirStr );
if(!indexDir.exists())
iv_logger.info("indexDir="+indexDirStr+" does not exist!");
@@ -64,15 +64,17 @@ public class LuceneIndexReaderResourceIm
if (useMemoryIndex.booleanValue()) {
- iv_logger.info("Loading Lucene Index into memory: " + indexDir);
+ iv_logger.info("Loading Lucene Index into memory: " + indexDir);
FSDirectory fsd = FSDirectory.open(indexDir);
Directory d = new RAMDirectory(fsd, IOContext.DEFAULT);
- iv_indexReader = IndexReader.open(d);
+// iv_indexReader = IndexReader.open(d);
+ iv_indexReader = DirectoryReader.open( d );
}
else {
iv_logger.info("Loading Lucene Index: " + indexDir);
- FSDirectory fsd = FSDirectory.open(indexDir);
- iv_indexReader = IndexReader.open(fsd);
+ FSDirectory fsd = FSDirectory.open(indexDir);
+// iv_indexReader = IndexReader.open(fsd);
+ iv_indexReader = DirectoryReader.open( fsd );
}
iv_logger.info("Loaded Lucene Index, # docs=" + iv_indexReader.numDocs());
}
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/resource/StringIntegerMapResourceImpl.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/resource/StringIntegerMapResourceImpl.java?rev=1814584&r1=1814583&r2=1814584&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/resource/StringIntegerMapResourceImpl.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/resource/StringIntegerMapResourceImpl.java Wed Nov 8 14:48:39 2017
@@ -18,6 +18,11 @@
*/
package org.apache.ctakes.core.resource;
+import org.apache.log4j.Logger;
+import org.apache.uima.resource.DataResource;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.SharedResourceObject;
+
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
@@ -26,12 +31,6 @@ import java.util.HashMap;
import java.util.Map;
import java.util.StringTokenizer;
-import org.apache.log4j.Logger;
-
-import org.apache.uima.resource.DataResource;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.SharedResourceObject;
-
/**
* Implementation for StringIntegerMapResource interface.
*
@@ -98,7 +97,7 @@ public class StringIntegerMapResourceImp
/**
* Gets a map of the String/Integer values.
*/
- public Map getMap()
+ public Map<String, Integer> getMap()
{
return iv_map;
}