You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2020/10/15 16:01:51 UTC
svn commit: r1882547 - in /ctakes/trunk:
ctakes-core/src/main/java/org/apache/ctakes/core/cr/
ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/
ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/
ctakes-gui/src/main/java/org/apac...
Author: seanfinan
Date: Thu Oct 15 16:01:51 2020
New Revision: 1882547
URL: http://svn.apache.org/viewvc?rev=1882547&view=rev
Log:
add StripQuotes parameter to AbstractFileTreeReader
add put(jcas) in IdentifiedAnnotationBuilder as synonym for build(jcas)
increase size of name label in FileChooserPanel
Change min synonym length in CuiTerm to 2
increase size of name label in CasedMainPanel
Modified:
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/AbstractFileTreeReader.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PiperFileReader.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/IdentifiedAnnotationBuilder.java
ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/component/FileChooserPanel.java
ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/cased/CasedMainPanel.java
ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/cased/term/CuiTerm.java
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/AbstractFileTreeReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/AbstractFileTreeReader.java?rev=1882547&r1=1882546&r2=1882547&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/AbstractFileTreeReader.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/AbstractFileTreeReader.java Thu Oct 15 16:01:51 2020
@@ -138,6 +138,21 @@ abstract public class AbstractFileTreeRe
)
private int _patientLevel = 1;
+
+ /**
+ * Some document text, such as that created from csv files, is enclosed in quote characters.
+ * These quotes can negatively impact some AEs such as Sectionizers that work on plain text instead of tokens.
+ * Spaces are used to maintain character indexes.
+ */
+ public static final String STRIP_QUOTES = "StripQuotes";
+ @ConfigurationParameter(
+ name = STRIP_QUOTES,
+ description = "Replace document-enclosing quote characters with space characters.",
+ mandatory = false
+ )
+ private boolean _stripQuotes = false;
+
+
static protected final String UNKNOWN = "Unknown";
// For compatibility with sql db : Timestamp format must be yyyy-mm-dd hh:mm:ss[.fffffffff]
static private final DateFormat DATE_FORMAT = new SimpleDateFormat( "yyyy-MM-dd hh:mm:ss" );
@@ -471,6 +486,52 @@ abstract public class AbstractFileTreeRe
return docText;
}
+ /**
+ * @param text document text
+ * @return the document text with document begin and end quote characters replaced with space characters if needed
+ */
+ final protected String handleQuotedDoc( final String text ) {
+ if ( !_stripQuotes || text.isEmpty() ) {
+ return text;
+ }
+ String docText = handleQuotedDoc( text, '\"' );
+ return handleQuotedDoc( docText, '\'' );
+ }
+
+ /**
+ * @param text document text
+ * @param quote quote character to replace with space characters.
+ * @return the document text with document begin and end quote characters replaced with space characters if needed
+ */
+ static private String handleQuotedDoc( final String text, final char quote ) {
+ String docText = text.trim();
+ final int beginDocQuote = docText.indexOf( quote );
+ if ( beginDocQuote != 0 ) {
+ return text;
+ }
+ final int endDocQuote = docText.lastIndexOf( quote );
+ if ( endDocQuote != docText.length() - 1 ) {
+ return text;
+ }
+ LOGGER.debug( "Replacing document-enclosing quote characters " + quote + " ..." );
+ String unquotedText = text;
+ final int beginQuote = text.indexOf( quote );
+ if ( beginQuote == 0 ) {
+ unquotedText = " " + unquotedText.substring( 1 );
+ } else {
+ unquotedText = unquotedText.substring( 0, beginQuote ) + " " + unquotedText.substring( beginQuote + 1 );
+ }
+ final int endQuote = unquotedText.lastIndexOf( quote );
+ if ( endQuote == unquotedText.length() - 1 ) {
+ unquotedText = unquotedText.substring( 0, unquotedText.length() - 1 ) + " ";
+ } else {
+ unquotedText = unquotedText.substring( 0, endQuote )
+ + " " + unquotedText.substring( endQuote + 1 );
+ }
+ return unquotedText;
+ }
+
+
protected JCasBuilder getJCasBuilder( final File file ) {
final String id = createDocumentID( file, getValidExtensions() );
final String idPrefix = createDocumentIdPrefix( file, getRootDir() );
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java?rev=1882547&r1=1882546&r2=1882547&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java Thu Oct 15 16:01:51 2020
@@ -44,6 +44,7 @@ final public class FileTreeReader extend
*/
protected void readFile( final JCas jCas, final File file ) throws IOException {
String docText = readFile( file );
+ docText = handleQuotedDoc( docText );
docText = handleTextEol( docText );
jCas.setDocumentText( docText );
}
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PiperFileReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PiperFileReader.java?rev=1882547&r1=1882546&r2=1882547&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PiperFileReader.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PiperFileReader.java Thu Oct 15 16:01:51 2020
@@ -1,7 +1,6 @@
package org.apache.ctakes.core.pipeline;
-import org.apache.ctakes.core.cc.XmiWriterCasConsumerCtakes;
import org.apache.ctakes.core.resource.FileLocator;
import org.apache.ctakes.core.util.log.DotLogger;
import org.apache.log4j.Logger;
@@ -33,7 +32,7 @@ import java.util.regex.Pattern;
* collectEntities
* writeXmis <i>output_directory</i>
* <i>output_directory</i> can be empty if
- * {@link XmiWriterCasConsumerCtakes#PARAM_OUTPUTDIR} ("OutputDirectory") was specified
+ * ("OutputDirectory") was specified
* // and # and ! may be used to mark line comments
* </p>
* class names must be fully-specified with package unless they are in standard ctakes cr ae or cc packages,
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/IdentifiedAnnotationBuilder.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/IdentifiedAnnotationBuilder.java?rev=1882547&r1=1882546&r2=1882547&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/IdentifiedAnnotationBuilder.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/IdentifiedAnnotationBuilder.java Thu Oct 15 16:01:51 2020
@@ -198,6 +198,18 @@ final public class IdentifiedAnnotationB
/**
+ * Builds the IdentifiedAnnotation and stores it in the jCas. The same as .build( jcas )
+ *
+ * @param jcas ye olde ...
+ * @return an IdentifiedAnnotation with properties specified or null if the cui or span are illegal.
+ */
+ public IdentifiedAnnotation put( final JCas jcas ) {
+ return build( jcas );
+ }
+
+ /**
+ * Builds the IdentifiedAnnotation and stores it in the jCas. The same as .put( jcas )
+ *
* @param jcas ye olde ...
* @return an IdentifiedAnnotation with properties specified or null if the cui or span are illegal.
*/
Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/component/FileChooserPanel.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/component/FileChooserPanel.java?rev=1882547&r1=1882546&r2=1882547&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/component/FileChooserPanel.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/component/FileChooserPanel.java Thu Oct 15 16:01:51 2020
@@ -37,7 +37,7 @@ final public class FileChooserPanel exte
super( new BorderLayout( 10, 10 ) );
setBorder( new EmptyBorder( 2, 10, 2, 10 ) );
final JLabel label = new JLabel( name );
- label.setPreferredSize( new Dimension( 100, 0 ) );
+ label.setPreferredSize( new Dimension( 150, 0 ) );
label.setHorizontalAlignment( SwingConstants.TRAILING );
final JTextField textField = new JTextField( defaultDirectory );
textField.setEditable( false );
Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/cased/CasedMainPanel.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/cased/CasedMainPanel.java?rev=1882547&r1=1882546&r2=1882547&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/cased/CasedMainPanel.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/cased/CasedMainPanel.java Thu Oct 15 16:01:51 2020
@@ -119,7 +119,7 @@ final class CasedMainPanel extends JPane
final JPanel panel = new JPanel( new BorderLayout( 10, 10 ) );
panel.setBorder( new EmptyBorder( 2, 10, 2, 10 ) );
final JLabel label = new JLabel( "Dictionary Name:" );
- label.setPreferredSize( new Dimension( 100, 0 ) );
+ label.setPreferredSize( new Dimension( 150, 0 ) );
label.setHorizontalAlignment( SwingConstants.TRAILING );
final JTextField textField = new JTextField( "custom" );
final JButton buildButton = new JButton( new BuildDictionaryAction( textField ) );
Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/cased/term/CuiTerm.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/cased/term/CuiTerm.java?rev=1882547&r1=1882546&r2=1882547&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/cased/term/CuiTerm.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/cased/term/CuiTerm.java Thu Oct 15 16:01:51 2020
@@ -22,7 +22,7 @@ import java.util.stream.Collectors;
@Immutable
final public class CuiTerm {
- static private final int MIN_SYNONYM_LENGTH = 3;
+ static private final int MIN_SYNONYM_LENGTH = 2;
static private final int MAX_SYNONYM_LENGTH = 79;
static private final int MAX_SYNONYM_TOKENS = 5;