You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2020/10/15 16:01:51 UTC

svn commit: r1882547 - in /ctakes/trunk: ctakes-core/src/main/java/org/apache/ctakes/core/cr/ ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/ ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/ ctakes-gui/src/main/java/org/apac...

Author: seanfinan
Date: Thu Oct 15 16:01:51 2020
New Revision: 1882547

URL: http://svn.apache.org/viewvc?rev=1882547&view=rev
Log:
add StripQuotes parameter to AbstractFileTreeReader
add put(jcas) in IdentifiedAnnotationBuilder as synonym for build(jcas)
increase size of name label in FileChooserPanel
Change min synonym length in CuiTerm to 2
increase size of name label in CasedMainPanel

Modified:
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/AbstractFileTreeReader.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PiperFileReader.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/IdentifiedAnnotationBuilder.java
    ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/component/FileChooserPanel.java
    ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/cased/CasedMainPanel.java
    ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/cased/term/CuiTerm.java

Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/AbstractFileTreeReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/AbstractFileTreeReader.java?rev=1882547&r1=1882546&r2=1882547&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/AbstractFileTreeReader.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/AbstractFileTreeReader.java Thu Oct 15 16:01:51 2020
@@ -138,6 +138,21 @@ abstract public class AbstractFileTreeRe
    )
    private int _patientLevel = 1;
 
+
+   /**
+    * Some document text, such as that created from csv files, is enclosed in quote characters.
+    * These quotes can negatively impact some AEs such as Sectionizers that work on plain text instead of tokens.
+    * Spaces are used to maintain character indexes.
+    */
+   public static final String STRIP_QUOTES = "StripQuotes";
+   @ConfigurationParameter(
+         name = STRIP_QUOTES,
+         description = "Replace document-enclosing quote characters with space characters.",
+         mandatory = false
+   )
+   private boolean _stripQuotes = false;
+
+
    static protected final String UNKNOWN = "Unknown";
    //   For compatibility with sql db : Timestamp format must be yyyy-mm-dd hh:mm:ss[.fffffffff]
    static private final DateFormat DATE_FORMAT = new SimpleDateFormat( "yyyy-MM-dd hh:mm:ss" );
@@ -471,6 +486,52 @@ abstract public class AbstractFileTreeRe
       return docText;
    }
 
+   /**
+    * @param text document text
+    * @return the document text with document begin and end quote characters replaced with space characters if needed
+    */
+   final protected String handleQuotedDoc( final String text ) {
+      if ( !_stripQuotes || text.isEmpty() ) {
+         return text;
+      }
+      String docText = handleQuotedDoc( text, '\"' );
+      return handleQuotedDoc( docText, '\'' );
+   }
+
+   /**
+    * @param text  document text
+    * @param quote quote character to replace with space characters.
+    * @return the document text with document begin and end quote characters replaced with space characters if needed
+    */
+   static private String handleQuotedDoc( final String text, final char quote ) {
+      String docText = text.trim();
+      final int beginDocQuote = docText.indexOf( quote );
+      if ( beginDocQuote != 0 ) {
+         return text;
+      }
+      final int endDocQuote = docText.lastIndexOf( quote );
+      if ( endDocQuote != docText.length() - 1 ) {
+         return text;
+      }
+      LOGGER.debug( "Replacing document-enclosing quote characters " + quote + " ..." );
+      String unquotedText = text;
+      final int beginQuote = text.indexOf( quote );
+      if ( beginQuote == 0 ) {
+         unquotedText = " " + unquotedText.substring( 1 );
+      } else {
+         unquotedText = unquotedText.substring( 0, beginQuote ) + " " + unquotedText.substring( beginQuote + 1 );
+      }
+      final int endQuote = unquotedText.lastIndexOf( quote );
+      if ( endQuote == unquotedText.length() - 1 ) {
+         unquotedText = unquotedText.substring( 0, unquotedText.length() - 1 ) + " ";
+      } else {
+         unquotedText = unquotedText.substring( 0, endQuote )
+                        + " " + unquotedText.substring( endQuote + 1 );
+      }
+      return unquotedText;
+   }
+
+
    protected JCasBuilder getJCasBuilder( final File file ) {
       final String id = createDocumentID( file, getValidExtensions() );
       final String idPrefix = createDocumentIdPrefix( file, getRootDir() );

Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java?rev=1882547&r1=1882546&r2=1882547&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java Thu Oct 15 16:01:51 2020
@@ -44,6 +44,7 @@ final public class FileTreeReader extend
     */
    protected void readFile( final JCas jCas, final File file ) throws IOException {
       String docText = readFile( file );
+      docText = handleQuotedDoc( docText );
       docText = handleTextEol( docText );
       jCas.setDocumentText( docText );
    }

Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PiperFileReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PiperFileReader.java?rev=1882547&r1=1882546&r2=1882547&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PiperFileReader.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PiperFileReader.java Thu Oct 15 16:01:51 2020
@@ -1,7 +1,6 @@
 package org.apache.ctakes.core.pipeline;
 
 
-import org.apache.ctakes.core.cc.XmiWriterCasConsumerCtakes;
 import org.apache.ctakes.core.resource.FileLocator;
 import org.apache.ctakes.core.util.log.DotLogger;
 import org.apache.log4j.Logger;
@@ -33,7 +32,7 @@ import java.util.regex.Pattern;
  * collectEntities
  * writeXmis <i>output_directory</i>
  *    <i>output_directory</i> can be empty if
- *    {@link XmiWriterCasConsumerCtakes#PARAM_OUTPUTDIR} ("OutputDirectory") was specified
+ *    ("OutputDirectory") was specified
  * // and # and ! may be used to mark line comments
  * </p>
  * class names must be fully-specified with package unless they are in standard ctakes cr ae or cc packages,

Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/IdentifiedAnnotationBuilder.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/IdentifiedAnnotationBuilder.java?rev=1882547&r1=1882546&r2=1882547&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/IdentifiedAnnotationBuilder.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/IdentifiedAnnotationBuilder.java Thu Oct 15 16:01:51 2020
@@ -198,6 +198,18 @@ final public class IdentifiedAnnotationB
 
 
    /**
+    * Builds the IdentifiedAnnotation and stores it in the jCas.  The same as .build( jcas )
+    *
+    * @param jcas ye olde ...
+    * @return an IdentifiedAnnotation with properties specified or null if the cui or span are illegal.
+    */
+   public IdentifiedAnnotation put( final JCas jcas ) {
+      return build( jcas );
+   }
+
+   /**
+    * Builds the IdentifiedAnnotation and stores it in the jCas.  The same as .put( jcas )
+    *
     * @param jcas ye olde ...
     * @return an IdentifiedAnnotation with properties specified or null if the cui or span are illegal.
     */

Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/component/FileChooserPanel.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/component/FileChooserPanel.java?rev=1882547&r1=1882546&r2=1882547&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/component/FileChooserPanel.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/component/FileChooserPanel.java Thu Oct 15 16:01:51 2020
@@ -37,7 +37,7 @@ final public class FileChooserPanel exte
       super( new BorderLayout( 10, 10 ) );
       setBorder( new EmptyBorder( 2, 10, 2, 10 ) );
       final JLabel label = new JLabel( name );
-      label.setPreferredSize( new Dimension( 100, 0 ) );
+      label.setPreferredSize( new Dimension( 150, 0 ) );
       label.setHorizontalAlignment( SwingConstants.TRAILING );
       final JTextField textField = new JTextField( defaultDirectory );
       textField.setEditable( false );

Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/cased/CasedMainPanel.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/cased/CasedMainPanel.java?rev=1882547&r1=1882546&r2=1882547&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/cased/CasedMainPanel.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/cased/CasedMainPanel.java Thu Oct 15 16:01:51 2020
@@ -119,7 +119,7 @@ final class CasedMainPanel extends JPane
       final JPanel panel = new JPanel( new BorderLayout( 10, 10 ) );
       panel.setBorder( new EmptyBorder( 2, 10, 2, 10 ) );
       final JLabel label = new JLabel( "Dictionary Name:" );
-      label.setPreferredSize( new Dimension( 100, 0 ) );
+      label.setPreferredSize( new Dimension( 150, 0 ) );
       label.setHorizontalAlignment( SwingConstants.TRAILING );
       final JTextField textField = new JTextField( "custom" );
       final JButton buildButton = new JButton( new BuildDictionaryAction( textField ) );

Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/cased/term/CuiTerm.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/cased/term/CuiTerm.java?rev=1882547&r1=1882546&r2=1882547&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/cased/term/CuiTerm.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/cased/term/CuiTerm.java Thu Oct 15 16:01:51 2020
@@ -22,7 +22,7 @@ import java.util.stream.Collectors;
 @Immutable
 final public class CuiTerm {
 
-   static private final int MIN_SYNONYM_LENGTH = 3;
+   static private final int MIN_SYNONYM_LENGTH = 2;
    static private final int MAX_SYNONYM_LENGTH = 79;
    static private final int MAX_SYNONYM_TOKENS = 5;