You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2022/11/21 17:50:20 UTC

svn commit: r1905443 [2/2] - in /ctakes/trunk: ./ ctakes-assertion/ ctakes-core-res/src/main/resources/org/apache/ctakes/core/sections/ ctakes-core/desc/analysis_engine/ ctakes-core/src/main/java/org/apache/ctakes/core/util/external/ ctakes-dictionary-...

Added: ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/cr/LetterColumnReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/cr/LetterColumnReader.java?rev=1905443&view=auto
==============================================================================
--- ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/cr/LetterColumnReader.java (added)
+++ ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/cr/LetterColumnReader.java Mon Nov 21 17:50:20 2022
@@ -0,0 +1,279 @@
+package org.apache.ctakes.examples.cr;
+
+import org.apache.ctakes.core.cr.AbstractFileTreeReader;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.ctakes.core.pipeline.ProgressManager;
+import org.apache.ctakes.core.util.Pair;
+import org.apache.ctakes.core.util.StringUtil;
+import org.apache.ctakes.core.util.doc.JCasBuilder;
+import org.apache.ctakes.core.util.doc.TextBySectionBuilder;
+import org.apache.ctakes.core.util.regex.RegexSpanFinder;
+import org.apache.log4j.Logger;
+import org.apache.uima.collection.CollectionException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.util.Progress;
+import org.apache.uima.util.ProgressImpl;
+
+import java.io.*;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+@PipeBitInfo(
+      name = "LetterColumnReader",
+      description = "Build Patient document text from columnar Letter text.",
+      role = PipeBitInfo.Role.READER
+)
+public class LetterColumnReader extends AbstractFileTreeReader {
+
+   static private final Logger LOGGER = Logger.getLogger( "LetterColumnReader" );
+
+   static private final Pattern LETTER_PATTERN = Pattern.compile( "\\bLetter [0-9]+\\|" );
+
+   private String _fileId = "";
+   private int _letterTotal = 0;
+   private int _letterCount = 0;
+   private final List<Letter> _fileLetters = new ArrayList<>();
+   private int _fileLetterIndex = 0;
+
+   private JCasBuilder _jCasBuilder = new JCasBuilder();
+
+
+   /**
+    * Gets the total number of documents that will be returned by this
+    * collection reader.
+    *
+    * @return the number of documents in the collection.
+    */
+   @Override
+   public int getNoteCount() {
+      return _letterTotal;
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean hasNext() {
+      if ( _fileLetterIndex < _fileLetters.size() ) {
+         return true;
+      }
+      final boolean hasNext = getCurrentIndex() < getFiles().size();
+      if ( !hasNext ) {
+         ProgressManager.getInstance()
+                        .updateProgress( _letterTotal );
+      }
+      return hasNext;
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void getNext( final JCas jcas ) throws IOException, CollectionException {
+      if ( _fileLetterIndex < _fileLetters.size() ) {
+         final Letter letter = _fileLetters.get( _fileLetterIndex );
+         _fileLetterIndex++;
+         _letterCount++;
+         ProgressManager.getInstance()
+                        .updateProgress( _letterCount );
+         _jCasBuilder.setDocId( _fileId + "_" + letter._id )
+                     .setDocTime( letter._date )
+                     .rebuild( jcas );
+         final TextBySectionBuilder builder = new TextBySectionBuilder();
+         letter._sections
+               .forEach( p -> builder.addSection( p.getValue1(), p.getValue2() ) );
+         builder.populate( jcas );
+         return;
+      }
+      final int currentFileIndex = getCurrentIndex();
+      final File file = getFiles().get( currentFileIndex );
+      setCurrentIndex( currentFileIndex + 1 );
+      _fileId = createDocumentID( file, getValidExtensions() );
+      readFile( jcas, file );
+      getNext( jcas );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Progress[] getProgress() {
+      return new Progress[]{
+            new ProgressImpl( _letterCount, _letterTotal, Progress.ENTITIES )
+      };
+   }
+
+
+   /**
+    * Places Document Text (and other information) in JCas.
+    *
+    * @param jCas unpopulated jcas data container.
+    * @param file file to be read.
+    * @throws IOException should anything bad happen.
+    */
+   protected void readFile( JCas jCas, File file ) throws IOException {
+      // Read the file, building a document only using lines preceded by "Text:"
+      LOGGER.info( "Reading File " + file.getPath() );
+      final String fileText = readByBuffer( file );
+      _fileLetters.clear();
+      _fileLetterIndex = 0;
+      if ( !fileText.isEmpty() ) {
+         _fileLetters.addAll( readLetters( fileText ) );
+         _letterTotal += _fileLetters.size();
+      }
+      _jCasBuilder = getJCasBuilder( file ).setDocType( "Letter" )
+                                           .nullDocText();
+      ProgressManager.getInstance()
+                     .updateProgress( _letterCount, _letterTotal );
+      LOGGER.info( "Parsed " + _fileLetters.size() + " letters" );
+   }
+
+
+   /**
+    * @param rawText complete raw text as read from file.
+    * @return letters parsed from file text.
+    * @throws IOException if things go wrong.
+    */
+   static private List<Letter> readLetters( final String rawText ) throws IOException {
+      final List<Integer> letterStarts;
+      try ( RegexSpanFinder finder = new RegexSpanFinder( LETTER_PATTERN ) ) {
+         letterStarts = finder.findSpans( rawText )
+                              .stream()
+                              .map( Pair::getValue1 )
+                              .collect( Collectors.toList() );
+      } catch ( IllegalArgumentException iaE ) {
+         throw new IOException( "Illegal Argument " + iaE.getMessage() );
+      }
+      if ( letterStarts.isEmpty() ) {
+         return Collections.emptyList();
+      }
+      final List<Letter> letters = new ArrayList<>();
+      Letter currentLetter = new Letter();
+      for ( int i = 0; i < letterStarts.size() - 1; i++ ) {
+         final String letterLine = rawText.substring( letterStarts.get( i ), letterStarts.get( i + 1 ) );
+         final Letter newOrCurrent = handleLetterLine( currentLetter, letterLine );
+         if ( !newOrCurrent._id.equals( currentLetter._id ) ) {
+            if ( currentLetter.hasInfo() ) {
+               letters.add( currentLetter );
+            }
+            currentLetter = newOrCurrent;
+         }
+      }
+      final String lastLetterLine = rawText.substring( letterStarts.get( letterStarts.size() - 1 ) );
+      final Letter newOrCurrent = handleLetterLine( currentLetter, lastLetterLine );
+      if ( currentLetter.hasInfo() ) {
+         letters.add( currentLetter );
+      }
+      if ( newOrCurrent.hasInfo() && !newOrCurrent._id.equals( currentLetter._id ) ) {
+         letters.add( newOrCurrent );
+      }
+      return letters;
+   }
+
+
+   /**
+    * @param letter the letter currently being populated with sections.
+    * @param line   a block of text representing a letter line.
+    * @return the letter provided with the text processed OR a new letter with the text processed.
+    */
+   static private Letter handleLetterLine( final Letter letter, final String line ) {
+      final LineType lineType = letter.addLine( line );
+      if ( lineType != LineType.NEXT_LETTER ) {
+         return letter;
+      }
+      return handleLetterLine( new Letter(), line );
+   }
+
+
+   /**
+    * Reads file using buffered input stream
+    *
+    * @param file file to read
+    * @return text in file
+    * @throws IOException if the file could not be read
+    */
+   private String readByBuffer( final File file ) throws IOException {
+      final String encoding = getValidEncoding();
+      // Use 8KB as the default buffer size
+      byte[] buffer = new byte[ 8192 ];
+      final StringBuilder sb = new StringBuilder();
+      try ( final InputStream inputStream = new BufferedInputStream( new FileInputStream( file ), buffer.length ) ) {
+         while ( true ) {
+            final int length = inputStream.read( buffer );
+            if ( length < 0 ) {
+               break;
+            }
+            if ( encoding != null && !encoding.isEmpty() && !UNKNOWN.equals( encoding ) ) {
+               sb.append( new String( buffer, 0, length, encoding ) );
+            } else {
+               sb.append( new String( buffer, 0, length ) );
+            }
+         }
+      } catch ( FileNotFoundException fnfE ) {
+         throw new IOException( fnfE );
+      }
+      return sb.toString();
+   }
+
+
+   private enum LineType {
+      MALFORMED,
+      EMPTY,
+      SECTION,
+      NEXT_LETTER
+   }
+
+   static private final class Letter {
+
+      private String _id;
+      private String _date;
+      private final List<Pair<String>> _sections = new ArrayList<>();
+
+      private LineType addLine( final String line ) {
+         final String[] splits = StringUtil.fastSplit( line, '|' );
+         if ( !isLineValid( splits ) ) {
+            return LineType.MALFORMED;
+         }
+         if ( _id == null ) {
+            _id = splits[ 0 ];
+         } else if ( !_id.equals( splits[ 0 ] ) ) {
+            return LineType.NEXT_LETTER;
+         }
+         if ( splits[ 3 ].isEmpty() ) {
+            // There is no letter content.
+            return LineType.EMPTY;
+         }
+         _sections.add( new Pair<>( splits[ 2 ], splits[ 3 ] ) );
+         _date = splits[ 6 ];
+         return LineType.SECTION;
+      }
+
+      private boolean hasInfo() {
+         return _id != null && _date != null && !_sections.isEmpty();
+      }
+
+      static private boolean isLineValid( final String[] splits ) {
+         if ( splits.length != 7 ) {
+            LOGGER.debug( "Incorrect number of columns ... skipping." );
+            return false;
+         }
+         if ( splits[ 0 ].trim()
+                         .isEmpty() ) {
+            LOGGER.debug( "No Letter Title ... skipping." );
+            return false;
+         }
+         if ( splits[ 6 ].trim()
+                         .isEmpty() ) {
+            LOGGER.debug( "No Letter Date ... skipping." );
+            return false;
+         }
+         return true;
+      }
+
+   }
+
+
+}

Added: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/generic/GenericRunnerGui.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/generic/GenericRunnerGui.java?rev=1905443&view=auto
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/generic/GenericRunnerGui.java (added)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/generic/GenericRunnerGui.java Mon Nov 21 17:50:20 2022
@@ -0,0 +1,62 @@
+package org.apache.ctakes.gui.generic;
+
+import org.apache.ctakes.gui.component.DisablerPane;
+import org.apache.log4j.Logger;
+
+import javax.swing.*;
+import java.awt.*;
+
+/**
+ * Can run a simple command line.
+ *
+ * @author SPF , chip-nlp
+ * @since {9/20/2022}
+ */
+final public class GenericRunnerGui {
+
+   static private final Logger LOGGER = Logger.getLogger( "GenericRunnerGui" );
+
+   static private JFrame createFrame() {
+      final JFrame frame = new JFrame( "cTAKES Simple Program Frame" );
+      frame.setDefaultCloseOperation( WindowConstants.EXIT_ON_CLOSE );
+      // Use 1024 x 768 as the minimum required resolution (XGA)
+      // iPhone 3 : 480 x 320 (3:2, HVGA)
+      // iPhone 4 : 960 x 640  (3:2, unique to Apple)
+      // iPhone 5 : 1136 x 640 (under 16:9, unique to Apple)
+      // iPad 3&4 : 2048 x 1536 (4:3, QXGA)
+      // iPad Mini: 1024 x 768 (4:3, XGA)
+      final Dimension size = new Dimension( 1024, 768 );
+      frame.setSize( size );
+      frame.setMinimumSize( size );
+      System.setProperty( "apple.laf.useScreenMenuBar", "true" );
+      return frame;
+   }
+
+
+   public static void main( final String... args ) {
+      try {
+         UIManager.setLookAndFeel( UIManager.getSystemLookAndFeelClassName() );
+         UIManager.getDefaults()
+                  .put( "SplitPane.border", BorderFactory.createEmptyBorder() );
+         // Needed for MacOS, which sets gridlines to white by default
+         UIManager.getDefaults()
+                  .put( "Table.gridColor", Color.GRAY );
+      } catch ( ClassNotFoundException | InstantiationException
+            | IllegalAccessException | UnsupportedLookAndFeelException multE ) {
+         LOGGER.error( multE.getLocalizedMessage() );
+      }
+      final JFrame frame = createFrame();
+      final MainPanel mainPanel = new MainPanel();
+      frame.add( mainPanel );
+      frame.pack();
+      frame.setVisible( true );
+      DisablerPane.getInstance()
+                  .initialize( frame );
+      mainPanel.readParameterFile( args );
+      LOGGER.info( "To start, click the Green Circular button above." );
+      LOGGER.info( "To stop, click the Red X button above." );
+      // Check for -p and -c specification of piper file and cli parameter file
+   }
+
+
+}

Added: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/generic/MainPanel.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/generic/MainPanel.java?rev=1905443&view=auto
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/generic/MainPanel.java (added)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/generic/MainPanel.java Mon Nov 21 17:50:20 2022
@@ -0,0 +1,217 @@
+package org.apache.ctakes.gui.generic;
+
+import org.apache.ctakes.core.util.external.SystemUtil;
+import org.apache.ctakes.gui.component.LoggerPanel;
+import org.apache.ctakes.gui.util.IconLoader;
+import org.apache.log4j.Logger;
+
+import javax.swing.*;
+import javax.swing.border.EmptyBorder;
+import java.awt.*;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+
+/**
+ * @author SPF , chip-nlp
+ * @since {9/20/2022}
+ */
+public class MainPanel extends JPanel {
+
+   static private final Logger LOGGER = Logger.getLogger( "MainPanel" );
+
+   private JButton _runButton;
+   private JButton _stopButton;
+
+   MainPanel() {
+      super( new BorderLayout() );
+      setBorder( new EmptyBorder( 2, 2, 2, 2 ) );
+      add( createToolBar(), BorderLayout.NORTH );
+      add( LoggerPanel.createLoggerPanel(), BorderLayout.CENTER );
+      SwingUtilities.invokeLater( new ButtonIconLoader() );
+   }
+
+   public void readParameterFile( final String... args ) {
+      if ( args.length != 1 ) {
+         logBadArgs( args );
+         return;
+      }
+      final File parmFile = new File( args[ 0 ] );
+      if ( !parmFile.canRead() ) {
+         LOGGER.error( "Cannot read parameter file: " + args[ 0 ] );
+         LOGGER.info( "Please exit the application" );
+         return;
+      }
+      String name = "";
+      String startCommand = "";
+      String directory = "";
+      String stopCommand = "";
+      try ( BufferedReader reader = new BufferedReader( new FileReader( args[ 0 ] ) ) ) {
+         String line = "";
+         while ( line != null ) {
+            if ( !line.isEmpty() && !line.startsWith( "//" ) ) {
+               if ( name.isEmpty() ) {
+                  name = line;
+               } else if ( startCommand.isEmpty() ) {
+                  startCommand = line;
+               } else if ( directory.isEmpty() ) {
+                  directory = line;
+               } else if ( stopCommand.isEmpty() ) {
+                  stopCommand = line;
+               } else {
+                  LOGGER.warn( "Ignoring extra line: " + line );
+               }
+            }
+            line = reader.readLine();
+         }
+      } catch ( IOException ioE ) {
+         LOGGER.error( ioE.getMessage() );
+         System.exit( -1 );
+      }
+      _runButton.addActionListener( new StartAction( name, startCommand, directory ) );
+      _stopButton.addActionListener( new StopAction( name, stopCommand, directory ) );
+
+   }
+
+   static private void logBadArgs( final String... args ) {
+      if ( args.length > 1 ) {
+         LOGGER.error( "There are too many arguments in " + String.join( " ", args ) );
+      }
+      LOGGER.error( "A single argument pointing to a File containing run parameters is required." );
+      LOGGER.info( "The file format is:" );
+      LOGGER.info( "Application Title" );
+      LOGGER.info( "Start Command" );
+      LOGGER.info( "Starting Directory (optional)" );
+      LOGGER.info( "Stop Command (optional)" );
+      LOGGER.info( "Please exit the application" );
+   }
+
+
+   private JToolBar createToolBar() {
+      final JToolBar toolBar = new JToolBar();
+      toolBar.setFloatable( false );
+      toolBar.setRollover( true );
+      toolBar.addSeparator( new Dimension( 10, 0 ) );
+      _runButton = addButton( toolBar, "Start " );
+      _runButton.setEnabled( false );
+      toolBar.addSeparator( new Dimension( 50, 0 ) );
+      _stopButton = addButton( toolBar, "Stop " );
+      _stopButton.setEnabled( false );
+
+      toolBar.addSeparator( new Dimension( 50, 0 ) );
+      toolBar.addSeparator( new Dimension( 10, 0 ) );
+
+      return toolBar;
+   }
+
+   static private JButton addButton( final JToolBar toolBar, final String toolTip ) {
+      final JButton button = new JButton();
+      button.setFocusPainted( false );
+      // prevents first button from having a painted border
+//      button.setFocusable( false );
+      button.setToolTipText( toolTip );
+      toolBar.add( button );
+      toolBar.addSeparator( new Dimension( 10, 0 ) );
+      return button;
+   }
+
+
+   private final class StartAction implements ActionListener {
+
+      private final String _name;
+      private final String _command;
+      private final String _dir;
+
+      private StartAction( final String name,
+                           final String command,
+                           final String dir ) {
+         _name = name;
+         _command = command;
+         _dir = dir;
+      }
+
+      @Override
+      public void actionPerformed( final ActionEvent event ) {
+         if ( _runButton == null ) {
+            return;
+         }
+         final SystemUtil.CommandRunner runner = new SystemUtil.CommandRunner( _command );
+         runner.setLogger( LOGGER );
+         runner.wait( true );
+         if ( _dir != null && !_dir.isEmpty() ) {
+            runner.setDirectory( _dir );
+         }
+         LOGGER.info( "Starting " + _name + "  ..." );
+         try {
+            SystemUtil.run( runner );
+         } catch ( IOException ioE ) {
+            LOGGER.error( ioE.getMessage() );
+         }
+      }
+
+   }
+
+
+   private final class StopAction implements ActionListener {
+
+      private final String _name;
+      private final String _command;
+      private final String _dir;
+
+      private StopAction( final String name,
+                          final String command,
+                          final String dir ) {
+         _name = name;
+         _command = command;
+         _dir = dir;
+      }
+
+      @Override
+      public void actionPerformed( final ActionEvent event ) {
+         if ( _runButton == null ) {
+            return;
+         }
+         final SystemUtil.CommandRunner runner = new SystemUtil.CommandRunner( _command );
+         runner.setLogger( LOGGER );
+         runner.wait( true );
+         if ( _dir != null && !_dir.isEmpty() ) {
+            runner.setDirectory( _dir );
+         }
+         LOGGER.info( "Stopping " + _name + "  ..." );
+         try {
+            SystemUtil.run( runner );
+         } catch ( IOException ioE ) {
+            LOGGER.error( ioE.getMessage() );
+         }
+      }
+
+   }
+
+
+   /**
+    * Simple Startable that loads an icon
+    * <p>
+    * Some icons
+    * <a href="https://www.freepik.com/free-vector/no-entry-hand-sign-isolated-white_10601278.htm#query=stop%20hand&position=1&from_view=keyword">Image by macrovector</a> on Freepik
+    */
+   private final class ButtonIconLoader implements Runnable {
+
+      @Override
+      public void run() {
+         final String dir = "org/apache/ctakes/gui/pipeline/icon/";
+         final String runPng = "RunPiper.png";
+         final String stopPng = "StopHand.png";
+
+         final Icon runIcon = IconLoader.loadIcon( dir + runPng );
+         final Icon stopIcon = IconLoader.loadIcon( dir + stopPng );
+         _runButton.setIcon( runIcon );
+         _stopButton.setIcon( stopIcon );
+      }
+
+   }
+
+
+}

Added: ctakes/trunk/ctakes-smoking-status/data/Deprecated.txt
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-smoking-status/data/Deprecated.txt?rev=1905443&view=auto
==============================================================================
--- ctakes/trunk/ctakes-smoking-status/data/Deprecated.txt (added)
+++ ctakes/trunk/ctakes-smoking-status/data/Deprecated.txt Mon Nov 21 17:50:20 2022
@@ -0,0 +1,2 @@
+Consider the note files in this directory deprecated.
+Example clinical notes are now in the ctakes-examples-res project.

Added: ctakes/trunk/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/PcsClassifier.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/PcsClassifier.java?rev=1905443&view=auto
==============================================================================
--- ctakes/trunk/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/PcsClassifier.java (added)
+++ ctakes/trunk/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/PcsClassifier.java Mon Nov 21 17:50:20 2022
@@ -0,0 +1,265 @@
+package org.apache.ctakes.smokingstatus.ae;
+
+import libsvm.svm;
+import libsvm.svm_model;
+import libsvm.svm_node;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.ctakes.core.util.log.DotLogger;
+import org.apache.ctakes.smokingstatus.type.libsvm.NominalAttributeValue;
+import org.apache.ctakes.typesystem.type.syntax.WordToken;
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.*;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+import static org.apache.ctakes.smokingstatus.Const.*;
+
+
+/**
+ * Update of original PcsClassifierAnnotator_libsvm to use UimaFit.
+ *
+ * @author SPF , chip-nlp
+ * @since {6/3/2022}
+ */
+@PipeBitInfo(
+      name = "PcsClassifier",
+      description = "Uses SVM for smoking status classification.",
+      role = PipeBitInfo.Role.ANNOTATOR
+)
+public class PcsClassifier extends JCasAnnotator_ImplBase {
+
+   static private final Logger LOGGER = Logger.getLogger( "PcsClassifier" );
+
+   static public final String CASED_PARAM = "CaseSensitive";
+   static public final String CASED_DESC = "yes/no for case sensitivity.";
+   @ConfigurationParameter(
+         name = CASED_PARAM,
+         description = CASED_DESC,
+         mandatory = false,
+         defaultValue = "yes"
+   )
+   private String _caseSensitive;
+
+   static public final String STOP_WORDS_PARAM = "StopWordsPath";
+   static public final String STOP_WORDS_DESC = "Path to file containing stop words.";
+   @ConfigurationParameter(
+         name = STOP_WORDS_PARAM,
+         description = STOP_WORDS_DESC
+   )
+   private String _stopWordsPath;
+
+   static public final String KEY_WORDS_PARAM = "KeyWordsPath";
+   static public final String KEY_WORDS_DESC = "Path to file containing key words.";
+   @ConfigurationParameter(
+         name = KEY_WORDS_PARAM,
+         description = KEY_WORDS_DESC
+   )
+   private String _keyWordsPath;
+
+   static public final String MODEL_PARAM = "ModelPath";
+   static public final String MODEL_DESC = "Path to file containing the model.";
+   @ConfigurationParameter(
+         name = MODEL_PARAM,
+         description = MODEL_DESC
+   )
+   private String _modelPath;
+
+   static private final Map<Integer, String> SMOKER_CODES = new HashMap<>();
+
+   static private final Pattern SPACE_PATTERN = Pattern.compile( "\\s+" );
+   static private final Pattern TEXT_CLEANER_PATTERN = Pattern.compile( "[.?!:;()',\"{}<>#+]" );
+   static private final String[] DATE_REGEXES = {
+         "19\\d\\d", "19\\d\\ds", "20\\d\\d", "20\\d\\ds", "[1-9]0s", "\\d{1,2}[/-]\\d{1,2}",
+         "\\d{1,2}[/-]\\d{4}", "\\d{1,2}[/-]\\d{1,2}[/-]\\d{2}", "\\d{1,2}[/-]\\d{1,2}[/-]\\d{4}" };
+
+   static private final Collection<Pattern> DATE_PATTERNS = new ArrayList<>();
+
+   static {
+      for ( String regex : DATE_REGEXES ) {
+         DATE_PATTERNS.add( Pattern.compile( regex ) );
+      }
+      SMOKER_CODES.put( CLASS_CURR_SMOKER_INT, CLASS_CURR_SMOKER );
+      SMOKER_CODES.put( CLASS_PAST_SMOKER_INT, CLASS_PAST_SMOKER );
+      SMOKER_CODES.put( CLASS_SMOKER_INT, CLASS_SMOKER );
+   }
+
+   private boolean _isCaseSensitive = true;
+   private final Collection<String> _stopWords = new HashSet<>();
+   private final List<String> _keyWords = new ArrayList<>();
+   // Trained lib_svm model.
+   private svm_model _model;
+
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void initialize( final UimaContext context ) throws ResourceInitializationException {
+      super.initialize( context );
+      LOGGER.info( "Initializing ..." );
+      try ( DotLogger dotter = new DotLogger() ) {
+         // run long initialization process.  Caught Exception may be of some other type.
+         if ( _caseSensitive.equalsIgnoreCase( "no" )
+              || _caseSensitive.equalsIgnoreCase( "false" ) ) {
+            _isCaseSensitive = false;
+         }
+         parseFile( _stopWordsPath, _isCaseSensitive, _stopWords );
+         parseFile( _keyWordsPath, _isCaseSensitive, _keyWords );
+         _model = svm.svm_load_model( FileLocator.getFile( _modelPath )
+                                                 .getPath() );
+      } catch ( IOException ioE ) {
+         throw new ResourceInitializationException( ioE );
+      }
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void process( final JCas jcas ) throws AnalysisEngineProcessException {
+      LOGGER.info( "Processing ..." );
+      try ( DotLogger dotter = new DotLogger() ) {
+         final List<Double> features = createFeatures( jcas );
+         // date information
+         double dateInfo = 0.0;
+         // Cannot access sentence by SentenceAnnotator or RecordSentence.  this is sentence!!
+         String sentence = jcas.getDocumentText();
+         sentence = TEXT_CLEANER_PATTERN.matcher( sentence )
+                                        .replaceAll( " " )
+                                        .trim();
+         final String[] textTokens = SPACE_PATTERN.split( sentence );
+         for ( String textToken : textTokens ) {
+            if ( DATE_PATTERNS.stream()
+                              .anyMatch( p -> p.matcher( textToken )
+                                               .matches() ) ) {
+               dateInfo = 1.0;
+               LOGGER.info( "***dateInfo|" + textToken + "|" + dateInfo );
+               break;
+            }
+         }
+         features.add( dateInfo );
+         // set the libSVM feature vector.
+         final svm_node[] svm_nodes = new svm_node[ features.size() ];
+         for ( int j = 0; j < features.size(); j++ ) {
+            svm_nodes[ j ] = new svm_node();
+            svm_nodes[ j ].index = j + 1;
+            svm_nodes[ j ].value = features.get( j );
+         }
+         // 1:CURRENT_SMOKER, 2:PAST_SMOKER, 3:SMOKER
+         final double classLabel = svm.svm_predict( _model, svm_nodes );
+         // string value.
+         // note that the original code would cast to integer, which is equivalent to floor but poor form.
+         final int intClassLabel = Double.valueOf( classLabel )
+                                         .intValue();
+         final String classValue = SMOKER_CODES.get( intClassLabel );
+         LOGGER.info( "classLabel=" + classLabel + " intClassLabel" + intClassLabel + " classValue=" + classValue );
+         final NominalAttributeValue nominalAttributeValue = new NominalAttributeValue( jcas );
+         nominalAttributeValue.setAttributeName( "smoking_status" );
+         nominalAttributeValue.setNominalValue( classValue );
+         nominalAttributeValue.addToIndexes();
+      } catch ( IOException ioE ) {
+         throw new AnalysisEngineProcessException( ioE );
+      }
+   }
+
+   private List<Double> createFeatures( final JCas jcas ) {
+      final List<Double> features = new ArrayList<>();
+      final List<String> unigrams = createUnigrams( jcas );
+      final List<String> bigrams = new ArrayList<>();
+      for ( int i = 0; i < unigrams.size() - 1; i++ ) {
+         bigrams.add( unigrams.get( i ) + "_" + unigrams.get( i + 1 ) );
+      }
+      // unigram & bigram keywords
+      for ( String keyWord : _keyWords ) {
+         double value = 0.0;
+         if ( keyWord.contains( "_" ) ) {
+            if ( bigrams.stream()
+                        .anyMatch( keyWord::equalsIgnoreCase ) ) {
+               value = 1.0;
+               LOGGER.info( "keyWord=" + keyWord + " bigram=" + bigrams.stream()
+                                                                       .filter( keyWord::equalsIgnoreCase )
+                                                                       .collect(
+                                                                             Collectors.joining( " ; " ) ) );
+            }
+         } else {
+            if ( unigrams.stream()
+                         .anyMatch( keyWord::equalsIgnoreCase ) ) {
+               value = 1.0;
+               LOGGER.info( "keyWord=" + keyWord + " unigram=" + unigrams.stream()
+                                                                         .filter( keyWord::equalsIgnoreCase )
+                                                                         .collect(
+                                                                               Collectors.joining( " ; " ) ) );
+            }
+         }
+         features.add( value );
+      }
+      return features;
+   }
+
+   private List<String> createUnigrams( final JCas jcas ) {
+      final List<String> unigrams = new ArrayList<>();
+      final Collection<WordToken> wordTokens = JCasUtil.select( jcas, WordToken.class );
+      for ( WordToken token : wordTokens ) {
+         String tokenText = token.getCoveredText();
+         if ( tokenText == null || tokenText.isEmpty() ) {
+            continue;
+         }
+         // TODO - The following code CONDITIONALLY turns tokenText to lowercase,
+         //  while the subsequent code ALWAYS turns tokenText to lowercase.
+//            if ( !_isCaseSensitive ) {
+//               tokenText = tokenText.toLowerCase();
+//            }
+         // if(!stopWords.contains(tok)) unigrams.add(tok);
+         // -- this is the replace of the above line
+         // Since the model was trained on words without non-word characters
+         tokenText = tokenText.toLowerCase()
+                              .replaceAll( "-{2,}", " " )
+                              .trim();
+         // with
+         // the
+         // cases
+         // like:
+         // Tobacco--quit
+         // in
+         // 1980.
+         Arrays.stream( SPACE_PATTERN.split( tokenText ) )
+               .filter( t -> !_stopWords.contains( t ) )
+               .forEach( unigrams::add );
+      }
+      return unigrams;
+   }
+
+   static private void parseFile( final String filePath,
+                                  final boolean isCaseSensitive,
+                                  final Collection<String> collection ) throws IOException {
+      try ( BufferedReader reader
+                  = new BufferedReader(
+            new InputStreamReader(
+                  FileLocator.getAsStream( filePath ) ) ) ) {
+         String line = reader.readLine();
+         while ( line != null ) {
+            if ( !isCaseSensitive ) {
+               line = line.toLowerCase();
+            }
+            collection.add( line );
+            line = reader.readLine();
+         }
+      } catch ( IOException ioE ) {
+         throw new IOException( "Couldn't read " + filePath + " " + ioE.getMessage() );
+      }
+   }
+
+
+}

Modified: ctakes/trunk/ctakes-ytex-web/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-ytex-web/pom.xml?rev=1905443&r1=1905442&r2=1905443&view=diff
==============================================================================
--- ctakes/trunk/ctakes-ytex-web/pom.xml (original)
+++ ctakes/trunk/ctakes-ytex-web/pom.xml Mon Nov 21 17:50:20 2022
@@ -132,32 +132,63 @@
 	<build>
 		<!-- dirty hack to get resources into the classpath (because the *-res 
 			dependencies are empty) -->
-		<resources>
-			<resource>
-				<directory>${project.basedir}/../ctakes-ytex-res/src/main/resources</directory>
-			</resource>
-			<resource>
-				<directory>
-				${project.basedir}/../ctakes-ytex/target/classes
-				</directory>
-				<excludes>
-					<exclude>**/*.class</exclude>
-				</excludes>
-			</resource>
-		</resources>
+		<!--		REMOVED 04/28/2022 in favor of maven-resources-plugin method below.  SPF -->
+		<!--		<resources>-->
+		<!--			<resource>-->
+		<!--				<directory>${project.basedir}/../ctakes-ytex-res/src/main/resources</directory>-->
+		<!--			</resource>-->
+		<!--			<resource>-->
+		<!--				<directory>-->
+		<!--				${project.basedir}/../ctakes-ytex/target/classes-->
+		<!--				</directory>-->
+		<!--				<excludes>-->
+		<!--					<exclude>**/*.class</exclude>-->
+		<!--				</excludes>-->
+		<!--			</resource>-->
+		<!--		</resources>-->
 		<!-- dirty hack to get test resources into the classpath (because the *-res 
 			dependencies are empty) -->
-		<testResources>
-			<testResource>
-				<directory>
-				${project.basedir}/../ctakes-ytex/target/test-classes
-				</directory>
-				<excludes>
-					<exclude>**/*.class</exclude>
-				</excludes>
-			</testResource>
-		</testResources>
+		<!--		<testResources>-->
+		<!--			<testResource>-->
+		<!--				<directory>-->
+		<!--				${project.basedir}/../ctakes-ytex/target/test-classes-->
+		<!--				</directory>-->
+		<!--				<excludes>-->
+		<!--					<exclude>**/*.class</exclude>-->
+		<!--				</excludes>-->
+		<!--			</testResource>-->
+		<!--		</testResources>-->
 		<plugins>
+
+			<!--    ctakes-ytex-res is a separate module with its own code repo.
+        ytex-web wants its resources.  There is a dirty hack above, but below is a
+        different method that should produce a usable result.
+          Using the plugin instead of redirecting resources allows maven to appropriately
+        build a classpath.   -->
+			<plugin>
+				<artifactId>maven-resources-plugin</artifactId>
+				<version>3.0.2</version>
+				<executions>
+					<execution>
+						<id>copy-resources</id>
+						<phase>compile</phase>
+						<goals>
+							<goal>copy-resources</goal>
+						</goals>
+						<configuration>
+							<outputDirectory>${basedir}/target/classes</outputDirectory>
+							<resources>
+								<resource>
+									<directory>${basedir}/../ctakes-ytex-res/src/main/resources</directory>
+									<filtering>true</filtering>
+								</resource>
+							</resources>
+						</configuration>
+					</execution>
+				</executions>
+			</plugin>
+
+
 			<plugin>
 				<groupId>org.eclipse.jetty</groupId>
 				<artifactId>jetty-maven-plugin</artifactId>