You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2022/11/21 17:50:20 UTC
svn commit: r1905443 [2/2] - in /ctakes/trunk: ./ ctakes-assertion/ ctakes-core-res/src/main/resources/org/apache/ctakes/core/sections/ ctakes-core/desc/analysis_engine/ ctakes-core/src/main/java/org/apache/ctakes/core/util/external/ ctakes-dictionary-...
Added: ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/cr/LetterColumnReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/cr/LetterColumnReader.java?rev=1905443&view=auto
==============================================================================
--- ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/cr/LetterColumnReader.java (added)
+++ ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/cr/LetterColumnReader.java Mon Nov 21 17:50:20 2022
@@ -0,0 +1,279 @@
+package org.apache.ctakes.examples.cr;
+
+import org.apache.ctakes.core.cr.AbstractFileTreeReader;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.ctakes.core.pipeline.ProgressManager;
+import org.apache.ctakes.core.util.Pair;
+import org.apache.ctakes.core.util.StringUtil;
+import org.apache.ctakes.core.util.doc.JCasBuilder;
+import org.apache.ctakes.core.util.doc.TextBySectionBuilder;
+import org.apache.ctakes.core.util.regex.RegexSpanFinder;
+import org.apache.log4j.Logger;
+import org.apache.uima.collection.CollectionException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.util.Progress;
+import org.apache.uima.util.ProgressImpl;
+
+import java.io.*;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+@PipeBitInfo(
+ name = "LetterColumnReader",
+ description = "Build Patient document text from columnar Letter text.",
+ role = PipeBitInfo.Role.READER
+)
+public class LetterColumnReader extends AbstractFileTreeReader {
+
+ static private final Logger LOGGER = Logger.getLogger( "LetterColumnReader" );
+
+ static private final Pattern LETTER_PATTERN = Pattern.compile( "\\bLetter [0-9]+\\|" );
+
+ private String _fileId = "";
+ private int _letterTotal = 0;
+ private int _letterCount = 0;
+ private final List<Letter> _fileLetters = new ArrayList<>();
+ private int _fileLetterIndex = 0;
+
+ private JCasBuilder _jCasBuilder = new JCasBuilder();
+
+
+ /**
+ * Gets the total number of documents that will be returned by this
+ * collection reader.
+ *
+ * @return the number of documents in the collection.
+ */
+ @Override
+ public int getNoteCount() {
+ return _letterTotal;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public boolean hasNext() {
+ if ( _fileLetterIndex < _fileLetters.size() ) {
+ return true;
+ }
+ final boolean hasNext = getCurrentIndex() < getFiles().size();
+ if ( !hasNext ) {
+ ProgressManager.getInstance()
+ .updateProgress( _letterTotal );
+ }
+ return hasNext;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void getNext( final JCas jcas ) throws IOException, CollectionException {
+ if ( _fileLetterIndex < _fileLetters.size() ) {
+ final Letter letter = _fileLetters.get( _fileLetterIndex );
+ _fileLetterIndex++;
+ _letterCount++;
+ ProgressManager.getInstance()
+ .updateProgress( _letterCount );
+ _jCasBuilder.setDocId( _fileId + "_" + letter._id )
+ .setDocTime( letter._date )
+ .rebuild( jcas );
+ final TextBySectionBuilder builder = new TextBySectionBuilder();
+ letter._sections
+ .forEach( p -> builder.addSection( p.getValue1(), p.getValue2() ) );
+ builder.populate( jcas );
+ return;
+ }
+ final int currentFileIndex = getCurrentIndex();
+ final File file = getFiles().get( currentFileIndex );
+ setCurrentIndex( currentFileIndex + 1 );
+ _fileId = createDocumentID( file, getValidExtensions() );
+ readFile( jcas, file );
+ getNext( jcas );
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public Progress[] getProgress() {
+ return new Progress[]{
+ new ProgressImpl( _letterCount, _letterTotal, Progress.ENTITIES )
+ };
+ }
+
+
+ /**
+ * Places Document Text (and other information) in JCas.
+ *
+ * @param jCas unpopulated jcas data container.
+ * @param file file to be read.
+ * @throws IOException should anything bad happen.
+ */
+ protected void readFile( JCas jCas, File file ) throws IOException {
+ // Read the file, building a document only using lines preceded by "Text:"
+ LOGGER.info( "Reading File " + file.getPath() );
+ final String fileText = readByBuffer( file );
+ _fileLetters.clear();
+ _fileLetterIndex = 0;
+ if ( !fileText.isEmpty() ) {
+ _fileLetters.addAll( readLetters( fileText ) );
+ _letterTotal += _fileLetters.size();
+ }
+ _jCasBuilder = getJCasBuilder( file ).setDocType( "Letter" )
+ .nullDocText();
+ ProgressManager.getInstance()
+ .updateProgress( _letterCount, _letterTotal );
+ LOGGER.info( "Parsed " + _fileLetters.size() + " letters" );
+ }
+
+
+ /**
+ * @param rawText complete raw text as read from file.
+ * @return letters parsed from file text.
+ * @throws IOException if things go wrong.
+ */
+ static private List<Letter> readLetters( final String rawText ) throws IOException {
+ final List<Integer> letterStarts;
+ try ( RegexSpanFinder finder = new RegexSpanFinder( LETTER_PATTERN ) ) {
+ letterStarts = finder.findSpans( rawText )
+ .stream()
+ .map( Pair::getValue1 )
+ .collect( Collectors.toList() );
+ } catch ( IllegalArgumentException iaE ) {
+ throw new IOException( "Illegal Argument " + iaE.getMessage() );
+ }
+ if ( letterStarts.isEmpty() ) {
+ return Collections.emptyList();
+ }
+ final List<Letter> letters = new ArrayList<>();
+ Letter currentLetter = new Letter();
+ for ( int i = 0; i < letterStarts.size() - 1; i++ ) {
+ final String letterLine = rawText.substring( letterStarts.get( i ), letterStarts.get( i + 1 ) );
+ final Letter newOrCurrent = handleLetterLine( currentLetter, letterLine );
+ if ( !newOrCurrent._id.equals( currentLetter._id ) ) {
+ if ( currentLetter.hasInfo() ) {
+ letters.add( currentLetter );
+ }
+ currentLetter = newOrCurrent;
+ }
+ }
+ final String lastLetterLine = rawText.substring( letterStarts.get( letterStarts.size() - 1 ) );
+ final Letter newOrCurrent = handleLetterLine( currentLetter, lastLetterLine );
+ if ( currentLetter.hasInfo() ) {
+ letters.add( currentLetter );
+ }
+ if ( newOrCurrent.hasInfo() && !newOrCurrent._id.equals( currentLetter._id ) ) {
+ letters.add( newOrCurrent );
+ }
+ return letters;
+ }
+
+
+ /**
+ * @param letter the letter currently being populated with sections.
+ * @param line a block of text representing a letter line.
+ * @return the letter provided with the text processed OR a new letter with the text processed.
+ */
+ static private Letter handleLetterLine( final Letter letter, final String line ) {
+ final LineType lineType = letter.addLine( line );
+ if ( lineType != LineType.NEXT_LETTER ) {
+ return letter;
+ }
+ return handleLetterLine( new Letter(), line );
+ }
+
+
+ /**
+ * Reads file using buffered input stream
+ *
+ * @param file file to read
+ * @return text in file
+ * @throws IOException if the file could not be read
+ */
+ private String readByBuffer( final File file ) throws IOException {
+ final String encoding = getValidEncoding();
+ // Use 8KB as the default buffer size
+ byte[] buffer = new byte[ 8192 ];
+ final StringBuilder sb = new StringBuilder();
+ try ( final InputStream inputStream = new BufferedInputStream( new FileInputStream( file ), buffer.length ) ) {
+ while ( true ) {
+ final int length = inputStream.read( buffer );
+ if ( length < 0 ) {
+ break;
+ }
+ if ( encoding != null && !encoding.isEmpty() && !UNKNOWN.equals( encoding ) ) {
+ sb.append( new String( buffer, 0, length, encoding ) );
+ } else {
+ sb.append( new String( buffer, 0, length ) );
+ }
+ }
+ } catch ( FileNotFoundException fnfE ) {
+ throw new IOException( fnfE );
+ }
+ return sb.toString();
+ }
+
+
+ private enum LineType {
+ MALFORMED,
+ EMPTY,
+ SECTION,
+ NEXT_LETTER
+ }
+
+ static private final class Letter {
+
+ private String _id;
+ private String _date;
+ private final List<Pair<String>> _sections = new ArrayList<>();
+
+ private LineType addLine( final String line ) {
+ final String[] splits = StringUtil.fastSplit( line, '|' );
+ if ( !isLineValid( splits ) ) {
+ return LineType.MALFORMED;
+ }
+ if ( _id == null ) {
+ _id = splits[ 0 ];
+ } else if ( !_id.equals( splits[ 0 ] ) ) {
+ return LineType.NEXT_LETTER;
+ }
+ if ( splits[ 3 ].isEmpty() ) {
+ // There is no letter content.
+ return LineType.EMPTY;
+ }
+ _sections.add( new Pair<>( splits[ 2 ], splits[ 3 ] ) );
+ _date = splits[ 6 ];
+ return LineType.SECTION;
+ }
+
+ private boolean hasInfo() {
+ return _id != null && _date != null && !_sections.isEmpty();
+ }
+
+ static private boolean isLineValid( final String[] splits ) {
+ if ( splits.length != 7 ) {
+ LOGGER.debug( "Incorrect number of columns ... skipping." );
+ return false;
+ }
+ if ( splits[ 0 ].trim()
+ .isEmpty() ) {
+ LOGGER.debug( "No Letter Title ... skipping." );
+ return false;
+ }
+ if ( splits[ 6 ].trim()
+ .isEmpty() ) {
+ LOGGER.debug( "No Letter Date ... skipping." );
+ return false;
+ }
+ return true;
+ }
+
+ }
+
+
+}
Added: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/generic/GenericRunnerGui.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/generic/GenericRunnerGui.java?rev=1905443&view=auto
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/generic/GenericRunnerGui.java (added)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/generic/GenericRunnerGui.java Mon Nov 21 17:50:20 2022
@@ -0,0 +1,62 @@
+package org.apache.ctakes.gui.generic;
+
+import org.apache.ctakes.gui.component.DisablerPane;
+import org.apache.log4j.Logger;
+
+import javax.swing.*;
+import java.awt.*;
+
+/**
+ * Can run a simple command line.
+ *
+ * @author SPF , chip-nlp
+ * @since {9/20/2022}
+ */
+final public class GenericRunnerGui {
+
+ static private final Logger LOGGER = Logger.getLogger( "GenericRunnerGui" );
+
+ static private JFrame createFrame() {
+ final JFrame frame = new JFrame( "cTAKES Simple Program Frame" );
+ frame.setDefaultCloseOperation( WindowConstants.EXIT_ON_CLOSE );
+ // Use 1024 x 768 as the minimum required resolution (XGA)
+ // iPhone 3 : 480 x 320 (3:2, HVGA)
+ // iPhone 4 : 960 x 640 (3:2, unique to Apple)
+ // iPhone 5 : 1136 x 640 (under 16:9, unique to Apple)
+ // iPad 3&4 : 2048 x 1536 (4:3, QXGA)
+ // iPad Mini: 1024 x 768 (4:3, XGA)
+ final Dimension size = new Dimension( 1024, 768 );
+ frame.setSize( size );
+ frame.setMinimumSize( size );
+ System.setProperty( "apple.laf.useScreenMenuBar", "true" );
+ return frame;
+ }
+
+
+ public static void main( final String... args ) {
+ try {
+ UIManager.setLookAndFeel( UIManager.getSystemLookAndFeelClassName() );
+ UIManager.getDefaults()
+ .put( "SplitPane.border", BorderFactory.createEmptyBorder() );
+ // Needed for MacOS, which sets gridlines to white by default
+ UIManager.getDefaults()
+ .put( "Table.gridColor", Color.GRAY );
+ } catch ( ClassNotFoundException | InstantiationException
+ | IllegalAccessException | UnsupportedLookAndFeelException multE ) {
+ LOGGER.error( multE.getLocalizedMessage() );
+ }
+ final JFrame frame = createFrame();
+ final MainPanel mainPanel = new MainPanel();
+ frame.add( mainPanel );
+ frame.pack();
+ frame.setVisible( true );
+ DisablerPane.getInstance()
+ .initialize( frame );
+ mainPanel.readParameterFile( args );
+ LOGGER.info( "To start, click the Green Circular button above." );
+ LOGGER.info( "To stop, click the Red X button above." );
+ // Check for -p and -c specification of piper file and cli parameter file
+ }
+
+
+}
Added: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/generic/MainPanel.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/generic/MainPanel.java?rev=1905443&view=auto
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/generic/MainPanel.java (added)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/generic/MainPanel.java Mon Nov 21 17:50:20 2022
@@ -0,0 +1,217 @@
+package org.apache.ctakes.gui.generic;
+
+import org.apache.ctakes.core.util.external.SystemUtil;
+import org.apache.ctakes.gui.component.LoggerPanel;
+import org.apache.ctakes.gui.util.IconLoader;
+import org.apache.log4j.Logger;
+
+import javax.swing.*;
+import javax.swing.border.EmptyBorder;
+import java.awt.*;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+
+/**
+ * @author SPF , chip-nlp
+ * @since {9/20/2022}
+ */
+public class MainPanel extends JPanel {
+
+ static private final Logger LOGGER = Logger.getLogger( "MainPanel" );
+
+ private JButton _runButton;
+ private JButton _stopButton;
+
+ MainPanel() {
+ super( new BorderLayout() );
+ setBorder( new EmptyBorder( 2, 2, 2, 2 ) );
+ add( createToolBar(), BorderLayout.NORTH );
+ add( LoggerPanel.createLoggerPanel(), BorderLayout.CENTER );
+ SwingUtilities.invokeLater( new ButtonIconLoader() );
+ }
+
+ public void readParameterFile( final String... args ) {
+ if ( args.length != 1 ) {
+ logBadArgs( args );
+ return;
+ }
+ final File parmFile = new File( args[ 0 ] );
+ if ( !parmFile.canRead() ) {
+ LOGGER.error( "Cannot read parameter file: " + args[ 0 ] );
+ LOGGER.info( "Please exit the application" );
+ return;
+ }
+ String name = "";
+ String startCommand = "";
+ String directory = "";
+ String stopCommand = "";
+ try ( BufferedReader reader = new BufferedReader( new FileReader( args[ 0 ] ) ) ) {
+ String line = "";
+ while ( line != null ) {
+ if ( !line.isEmpty() && !line.startsWith( "//" ) ) {
+ if ( name.isEmpty() ) {
+ name = line;
+ } else if ( startCommand.isEmpty() ) {
+ startCommand = line;
+ } else if ( directory.isEmpty() ) {
+ directory = line;
+ } else if ( stopCommand.isEmpty() ) {
+ stopCommand = line;
+ } else {
+ LOGGER.warn( "Ignoring extra line: " + line );
+ }
+ }
+ line = reader.readLine();
+ }
+ } catch ( IOException ioE ) {
+ LOGGER.error( ioE.getMessage() );
+ System.exit( -1 );
+ }
+ _runButton.addActionListener( new StartAction( name, startCommand, directory ) );
+ _stopButton.addActionListener( new StopAction( name, stopCommand, directory ) );
+
+ }
+
+ static private void logBadArgs( final String... args ) {
+ if ( args.length > 1 ) {
+ LOGGER.error( "There are too many arguments in " + String.join( " ", args ) );
+ }
+ LOGGER.error( "A single argument pointing to a File containing run parameters is required." );
+ LOGGER.info( "The file format is:" );
+ LOGGER.info( "Application Title" );
+ LOGGER.info( "Start Command" );
+ LOGGER.info( "Starting Directory (optional)" );
+ LOGGER.info( "Stop Command (optional)" );
+ LOGGER.info( "Please exit the application" );
+ }
+
+
+ private JToolBar createToolBar() {
+ final JToolBar toolBar = new JToolBar();
+ toolBar.setFloatable( false );
+ toolBar.setRollover( true );
+ toolBar.addSeparator( new Dimension( 10, 0 ) );
+ _runButton = addButton( toolBar, "Start " );
+ _runButton.setEnabled( false );
+ toolBar.addSeparator( new Dimension( 50, 0 ) );
+ _stopButton = addButton( toolBar, "Stop " );
+ _stopButton.setEnabled( false );
+
+ toolBar.addSeparator( new Dimension( 50, 0 ) );
+ toolBar.addSeparator( new Dimension( 10, 0 ) );
+
+ return toolBar;
+ }
+
+ static private JButton addButton( final JToolBar toolBar, final String toolTip ) {
+ final JButton button = new JButton();
+ button.setFocusPainted( false );
+ // prevents first button from having a painted border
+// button.setFocusable( false );
+ button.setToolTipText( toolTip );
+ toolBar.add( button );
+ toolBar.addSeparator( new Dimension( 10, 0 ) );
+ return button;
+ }
+
+
+ private final class StartAction implements ActionListener {
+
+ private final String _name;
+ private final String _command;
+ private final String _dir;
+
+ private StartAction( final String name,
+ final String command,
+ final String dir ) {
+ _name = name;
+ _command = command;
+ _dir = dir;
+ }
+
+ @Override
+ public void actionPerformed( final ActionEvent event ) {
+ if ( _runButton == null ) {
+ return;
+ }
+ final SystemUtil.CommandRunner runner = new SystemUtil.CommandRunner( _command );
+ runner.setLogger( LOGGER );
+ runner.wait( true );
+ if ( _dir != null && !_dir.isEmpty() ) {
+ runner.setDirectory( _dir );
+ }
+ LOGGER.info( "Starting " + _name + " ..." );
+ try {
+ SystemUtil.run( runner );
+ } catch ( IOException ioE ) {
+ LOGGER.error( ioE.getMessage() );
+ }
+ }
+
+ }
+
+
+ private final class StopAction implements ActionListener {
+
+ private final String _name;
+ private final String _command;
+ private final String _dir;
+
+ private StopAction( final String name,
+ final String command,
+ final String dir ) {
+ _name = name;
+ _command = command;
+ _dir = dir;
+ }
+
+ @Override
+ public void actionPerformed( final ActionEvent event ) {
+ if ( _runButton == null ) {
+ return;
+ }
+ final SystemUtil.CommandRunner runner = new SystemUtil.CommandRunner( _command );
+ runner.setLogger( LOGGER );
+ runner.wait( true );
+ if ( _dir != null && !_dir.isEmpty() ) {
+ runner.setDirectory( _dir );
+ }
+ LOGGER.info( "Stopping " + _name + " ..." );
+ try {
+ SystemUtil.run( runner );
+ } catch ( IOException ioE ) {
+ LOGGER.error( ioE.getMessage() );
+ }
+ }
+
+ }
+
+
+ /**
+ * Simple Startable that loads an icon
+ * <p>
+ * Some icons
+ * <a href="https://www.freepik.com/free-vector/no-entry-hand-sign-isolated-white_10601278.htm#query=stop%20hand&position=1&from_view=keyword">Image by macrovector</a> on Freepik
+ */
+ private final class ButtonIconLoader implements Runnable {
+
+ @Override
+ public void run() {
+ final String dir = "org/apache/ctakes/gui/pipeline/icon/";
+ final String runPng = "RunPiper.png";
+ final String stopPng = "StopHand.png";
+
+ final Icon runIcon = IconLoader.loadIcon( dir + runPng );
+ final Icon stopIcon = IconLoader.loadIcon( dir + stopPng );
+ _runButton.setIcon( runIcon );
+ _stopButton.setIcon( stopIcon );
+ }
+
+ }
+
+
+}
Added: ctakes/trunk/ctakes-smoking-status/data/Deprecated.txt
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-smoking-status/data/Deprecated.txt?rev=1905443&view=auto
==============================================================================
--- ctakes/trunk/ctakes-smoking-status/data/Deprecated.txt (added)
+++ ctakes/trunk/ctakes-smoking-status/data/Deprecated.txt Mon Nov 21 17:50:20 2022
@@ -0,0 +1,2 @@
+Consider the note files in this directory deprecated.
+Example clinical notes are now in the ctakes-examples-res project.
Added: ctakes/trunk/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/PcsClassifier.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/PcsClassifier.java?rev=1905443&view=auto
==============================================================================
--- ctakes/trunk/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/PcsClassifier.java (added)
+++ ctakes/trunk/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/PcsClassifier.java Mon Nov 21 17:50:20 2022
@@ -0,0 +1,265 @@
+package org.apache.ctakes.smokingstatus.ae;
+
+import libsvm.svm;
+import libsvm.svm_model;
+import libsvm.svm_node;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.ctakes.core.util.log.DotLogger;
+import org.apache.ctakes.smokingstatus.type.libsvm.NominalAttributeValue;
+import org.apache.ctakes.typesystem.type.syntax.WordToken;
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.*;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+import static org.apache.ctakes.smokingstatus.Const.*;
+
+
+/**
+ * Update of original PcsClassifierAnnotator_libsvm to use UimaFit.
+ *
+ * @author SPF , chip-nlp
+ * @since {6/3/2022}
+ */
+@PipeBitInfo(
+ name = "PcsClassifier",
+ description = "Uses SVM for smoking status classification.",
+ role = PipeBitInfo.Role.ANNOTATOR
+)
+public class PcsClassifier extends JCasAnnotator_ImplBase {
+
+ static private final Logger LOGGER = Logger.getLogger( "PcsClassifier" );
+
+ static public final String CASED_PARAM = "CaseSensitive";
+ static public final String CASED_DESC = "yes/no for case sensitivity.";
+ @ConfigurationParameter(
+ name = CASED_PARAM,
+ description = CASED_DESC,
+ mandatory = false,
+ defaultValue = "yes"
+ )
+ private String _caseSensitive;
+
+ static public final String STOP_WORDS_PARAM = "StopWordsPath";
+ static public final String STOP_WORDS_DESC = "Path to file containing stop words.";
+ @ConfigurationParameter(
+ name = STOP_WORDS_PARAM,
+ description = STOP_WORDS_DESC
+ )
+ private String _stopWordsPath;
+
+ static public final String KEY_WORDS_PARAM = "KeyWordsPath";
+ static public final String KEY_WORDS_DESC = "Path to file containing key words.";
+ @ConfigurationParameter(
+ name = KEY_WORDS_PARAM,
+ description = KEY_WORDS_DESC
+ )
+ private String _keyWordsPath;
+
+ static public final String MODEL_PARAM = "ModelPath";
+ static public final String MODEL_DESC = "Path to file containing the model.";
+ @ConfigurationParameter(
+ name = MODEL_PARAM,
+ description = MODEL_DESC
+ )
+ private String _modelPath;
+
+ static private final Map<Integer, String> SMOKER_CODES = new HashMap<>();
+
+ static private final Pattern SPACE_PATTERN = Pattern.compile( "\\s+" );
+ static private final Pattern TEXT_CLEANER_PATTERN = Pattern.compile( "[.?!:;()',\"{}<>#+]" );
+ static private final String[] DATE_REGEXES = {
+ "19\\d\\d", "19\\d\\ds", "20\\d\\d", "20\\d\\ds", "[1-9]0s", "\\d{1,2}[/-]\\d{1,2}",
+ "\\d{1,2}[/-]\\d{4}", "\\d{1,2}[/-]\\d{1,2}[/-]\\d{2}", "\\d{1,2}[/-]\\d{1,2}[/-]\\d{4}" };
+
+ static private final Collection<Pattern> DATE_PATTERNS = new ArrayList<>();
+
+ static {
+ for ( String regex : DATE_REGEXES ) {
+ DATE_PATTERNS.add( Pattern.compile( regex ) );
+ }
+ SMOKER_CODES.put( CLASS_CURR_SMOKER_INT, CLASS_CURR_SMOKER );
+ SMOKER_CODES.put( CLASS_PAST_SMOKER_INT, CLASS_PAST_SMOKER );
+ SMOKER_CODES.put( CLASS_SMOKER_INT, CLASS_SMOKER );
+ }
+
+ private boolean _isCaseSensitive = true;
+ private final Collection<String> _stopWords = new HashSet<>();
+ private final List<String> _keyWords = new ArrayList<>();
+ // Trained lib_svm model.
+ private svm_model _model;
+
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void initialize( final UimaContext context ) throws ResourceInitializationException {
+ super.initialize( context );
+ LOGGER.info( "Initializing ..." );
+ try ( DotLogger dotter = new DotLogger() ) {
+ // run long initialization process. Caught Exception may be of some other type.
+ if ( _caseSensitive.equalsIgnoreCase( "no" )
+ || _caseSensitive.equalsIgnoreCase( "false" ) ) {
+ _isCaseSensitive = false;
+ }
+ parseFile( _stopWordsPath, _isCaseSensitive, _stopWords );
+ parseFile( _keyWordsPath, _isCaseSensitive, _keyWords );
+ _model = svm.svm_load_model( FileLocator.getFile( _modelPath )
+ .getPath() );
+ } catch ( IOException ioE ) {
+ throw new ResourceInitializationException( ioE );
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void process( final JCas jcas ) throws AnalysisEngineProcessException {
+ LOGGER.info( "Processing ..." );
+ try ( DotLogger dotter = new DotLogger() ) {
+ final List<Double> features = createFeatures( jcas );
+ // date information
+ double dateInfo = 0.0;
+ // Cannot access sentence by SentenceAnnotator or RecordSentence. this is sentence!!
+ String sentence = jcas.getDocumentText();
+ sentence = TEXT_CLEANER_PATTERN.matcher( sentence )
+ .replaceAll( " " )
+ .trim();
+ final String[] textTokens = SPACE_PATTERN.split( sentence );
+ for ( String textToken : textTokens ) {
+ if ( DATE_PATTERNS.stream()
+ .anyMatch( p -> p.matcher( textToken )
+ .matches() ) ) {
+ dateInfo = 1.0;
+ LOGGER.info( "***dateInfo|" + textToken + "|" + dateInfo );
+ break;
+ }
+ }
+ features.add( dateInfo );
+ // set the libSVM feature vector.
+ final svm_node[] svm_nodes = new svm_node[ features.size() ];
+ for ( int j = 0; j < features.size(); j++ ) {
+ svm_nodes[ j ] = new svm_node();
+ svm_nodes[ j ].index = j + 1;
+ svm_nodes[ j ].value = features.get( j );
+ }
+ // 1:CURRENT_SMOKER, 2:PAST_SMOKER, 3:SMOKER
+ final double classLabel = svm.svm_predict( _model, svm_nodes );
+ // string value.
+ // note that the original code would cast to integer, which is equivalent to floor but poor form.
+ final int intClassLabel = Double.valueOf( classLabel )
+ .intValue();
+ final String classValue = SMOKER_CODES.get( intClassLabel );
+ LOGGER.info( "classLabel=" + classLabel + " intClassLabel" + intClassLabel + " classValue=" + classValue );
+ final NominalAttributeValue nominalAttributeValue = new NominalAttributeValue( jcas );
+ nominalAttributeValue.setAttributeName( "smoking_status" );
+ nominalAttributeValue.setNominalValue( classValue );
+ nominalAttributeValue.addToIndexes();
+ } catch ( IOException ioE ) {
+ throw new AnalysisEngineProcessException( ioE );
+ }
+ }
+
+ private List<Double> createFeatures( final JCas jcas ) {
+ final List<Double> features = new ArrayList<>();
+ final List<String> unigrams = createUnigrams( jcas );
+ final List<String> bigrams = new ArrayList<>();
+ for ( int i = 0; i < unigrams.size() - 1; i++ ) {
+ bigrams.add( unigrams.get( i ) + "_" + unigrams.get( i + 1 ) );
+ }
+ // unigram & bigram keywords
+ for ( String keyWord : _keyWords ) {
+ double value = 0.0;
+ if ( keyWord.contains( "_" ) ) {
+ if ( bigrams.stream()
+ .anyMatch( keyWord::equalsIgnoreCase ) ) {
+ value = 1.0;
+ LOGGER.info( "keyWord=" + keyWord + " bigram=" + bigrams.stream()
+ .filter( keyWord::equalsIgnoreCase )
+ .collect(
+ Collectors.joining( " ; " ) ) );
+ }
+ } else {
+ if ( unigrams.stream()
+ .anyMatch( keyWord::equalsIgnoreCase ) ) {
+ value = 1.0;
+ LOGGER.info( "keyWord=" + keyWord + " unigram=" + unigrams.stream()
+ .filter( keyWord::equalsIgnoreCase )
+ .collect(
+ Collectors.joining( " ; " ) ) );
+ }
+ }
+ features.add( value );
+ }
+ return features;
+ }
+
+ private List<String> createUnigrams( final JCas jcas ) {
+ final List<String> unigrams = new ArrayList<>();
+ final Collection<WordToken> wordTokens = JCasUtil.select( jcas, WordToken.class );
+ for ( WordToken token : wordTokens ) {
+ String tokenText = token.getCoveredText();
+ if ( tokenText == null || tokenText.isEmpty() ) {
+ continue;
+ }
+ // TODO - The following code CONDITIONALLY turns tokenText to lowercase,
+ // while the subsequent code ALWAYS turns tokenText to lowercase.
+// if ( !_isCaseSensitive ) {
+// tokenText = tokenText.toLowerCase();
+// }
+ // if(!stopWords.contains(tok)) unigrams.add(tok);
+ // -- this is the replace of the above line
+ // Since the model was trained on words without non-word characters
+ tokenText = tokenText.toLowerCase()
+ .replaceAll( "-{2,}", " " )
+ .trim();
+ // with
+ // the
+ // cases
+ // like:
+ // Tobacco--quit
+ // in
+ // 1980.
+ Arrays.stream( SPACE_PATTERN.split( tokenText ) )
+ .filter( t -> !_stopWords.contains( t ) )
+ .forEach( unigrams::add );
+ }
+ return unigrams;
+ }
+
+ static private void parseFile( final String filePath,
+ final boolean isCaseSensitive,
+ final Collection<String> collection ) throws IOException {
+ try ( BufferedReader reader
+ = new BufferedReader(
+ new InputStreamReader(
+ FileLocator.getAsStream( filePath ) ) ) ) {
+ String line = reader.readLine();
+ while ( line != null ) {
+ if ( !isCaseSensitive ) {
+ line = line.toLowerCase();
+ }
+ collection.add( line );
+ line = reader.readLine();
+ }
+ } catch ( IOException ioE ) {
+ throw new IOException( "Couldn't read " + filePath + " " + ioE.getMessage() );
+ }
+ }
+
+
+}
Modified: ctakes/trunk/ctakes-ytex-web/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-ytex-web/pom.xml?rev=1905443&r1=1905442&r2=1905443&view=diff
==============================================================================
--- ctakes/trunk/ctakes-ytex-web/pom.xml (original)
+++ ctakes/trunk/ctakes-ytex-web/pom.xml Mon Nov 21 17:50:20 2022
@@ -132,32 +132,63 @@
<build>
<!-- dirty hack to get resources into the classpath (because the *-res
dependencies are empty) -->
- <resources>
- <resource>
- <directory>${project.basedir}/../ctakes-ytex-res/src/main/resources</directory>
- </resource>
- <resource>
- <directory>
- ${project.basedir}/../ctakes-ytex/target/classes
- </directory>
- <excludes>
- <exclude>**/*.class</exclude>
- </excludes>
- </resource>
- </resources>
+ <!-- REMOVED 04/28/2022 in favor of maven-resources-plugin method below. SPF -->
+ <!-- <resources>-->
+ <!-- <resource>-->
+ <!-- <directory>${project.basedir}/../ctakes-ytex-res/src/main/resources</directory>-->
+ <!-- </resource>-->
+ <!-- <resource>-->
+ <!-- <directory>-->
+ <!-- ${project.basedir}/../ctakes-ytex/target/classes-->
+ <!-- </directory>-->
+ <!-- <excludes>-->
+ <!-- <exclude>**/*.class</exclude>-->
+ <!-- </excludes>-->
+ <!-- </resource>-->
+ <!-- </resources>-->
<!-- dirty hack to get test resources into the classpath (because the *-res
dependencies are empty) -->
- <testResources>
- <testResource>
- <directory>
- ${project.basedir}/../ctakes-ytex/target/test-classes
- </directory>
- <excludes>
- <exclude>**/*.class</exclude>
- </excludes>
- </testResource>
- </testResources>
+ <!-- <testResources>-->
+ <!-- <testResource>-->
+ <!-- <directory>-->
+ <!-- ${project.basedir}/../ctakes-ytex/target/test-classes-->
+ <!-- </directory>-->
+ <!-- <excludes>-->
+ <!-- <exclude>**/*.class</exclude>-->
+ <!-- </excludes>-->
+ <!-- </testResource>-->
+ <!-- </testResources>-->
<plugins>
+
+ <!-- ctakes-ytex-res is a separate module with its own code repo.
+ ytex-web wants its resources. There is a dirty hack above, but below is a
+ different method that should produce a usable result.
+ Using the plugin instead of redirecting resources allows maven to appropriately
+ build a classpath. -->
+ <plugin>
+ <artifactId>maven-resources-plugin</artifactId>
+ <version>3.0.2</version>
+ <executions>
+ <execution>
+ <id>copy-resources</id>
+ <phase>compile</phase>
+ <goals>
+ <goal>copy-resources</goal>
+ </goals>
+ <configuration>
+ <outputDirectory>${basedir}/target/classes</outputDirectory>
+ <resources>
+ <resource>
+ <directory>${basedir}/../ctakes-ytex-res/src/main/resources</directory>
+ <filtering>true</filtering>
+ </resource>
+ </resources>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+
+
<plugin>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-maven-plugin</artifactId>