You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2015/03/19 17:10:27 UTC

svn commit: r1667798 - in /uima/ruta/trunk/ruta-core/src/main: java/org/apache/uima/ruta/engine/HtmlConverter.java java/org/apache/uima/ruta/engine/HtmlConverterVisitor.java resources/org/apache/uima/ruta/engine/HtmlConverter.xml

Author: pkluegl
Date: Thu Mar 19 16:10:26 2015
New Revision: 1667798

URL: http://svn.apache.org/r1667798
Log:
UIMA-4286
- added configuration parameter to avoid inBody

Modified:
    uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/HtmlConverter.java
    uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/HtmlConverterVisitor.java
    uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/HtmlConverter.xml

Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/HtmlConverter.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/HtmlConverter.java?rev=1667798&r1=1667797&r2=1667798&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/HtmlConverter.java (original)
+++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/HtmlConverter.java Thu Mar 19 16:10:26 2015
@@ -114,6 +114,15 @@ public class HtmlConverter extends JCasA
   private Boolean skipWhitespaces;
 
   /**
+   * TODO
+   */
+  public static final String PARAM_PROCESS_ALL = "processAll";
+
+  @ConfigurationParameter(name = PARAM_PROCESS_ALL, mandatory = false, defaultValue = "false")
+  private Boolean processAll;
+
+  
+  /**
    * This string parameter determines the character sequence that replaces a linebreak. The default
    * behavior is the empty string.
    */
@@ -186,6 +195,8 @@ public class HtmlConverter extends JCasA
     replaceLinebreaks = replaceLinebreaks == null ? true : replaceLinebreaks;
     skipWhitespaces = (Boolean) aContext.getConfigParameterValue(PARAM_SKIP_WHITESPACES);
     skipWhitespaces = skipWhitespaces == null ? true : skipWhitespaces;
+    processAll = (Boolean) aContext.getConfigParameterValue(PARAM_PROCESS_ALL);
+    processAll = processAll == null ? true : processAll;
     linebreakReplacement = (String) aContext.getConfigParameterValue(PARAM_LINEBREAK_REPLACEMENT);
     linebreakReplacement = linebreakReplacement == null ? "" : linebreakReplacement;
     String conversionPolicy = (String) aContext.getConfigParameterValue(PARAM_CONVERSION_POLICY);
@@ -267,7 +278,7 @@ public class HtmlConverter extends JCasA
     try {
       Parser parser = new Parser(documentText);
       NodeList list = parser.parse(null);
-      HtmlConverterVisitor visitor = new HtmlConverterVisitor(newlineInducingTags, skipWhitespaces);
+      HtmlConverterVisitor visitor = new HtmlConverterVisitor(newlineInducingTags, skipWhitespaces, processAll);
       list.visitAllNodesWith(visitor);
       visibleSpansSoFar = visitor.getTextSpans();
       linebreaksFromHtmlTags = visitor.getLinebreaksFromHtmlTags();

Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/HtmlConverterVisitor.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/HtmlConverterVisitor.java?rev=1667798&r1=1667797&r2=1667798&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/HtmlConverterVisitor.java (original)
+++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/HtmlConverterVisitor.java Thu Mar 19 16:10:26 2015
@@ -45,15 +45,18 @@ public class HtmlConverterVisitor extend
 
   private Collection<String> newlineInducingTags;
 
-  public HtmlConverterVisitor(String[] newlineInducingTags, boolean skipWhitespace) {
+  private boolean processAll = true;
+
+  public HtmlConverterVisitor(String[] newlineInducingTags, boolean skipWhitespace, boolean processAll) {
     this.newlineInducingTags = Arrays.asList(newlineInducingTags);
     this.skipWhitespace = skipWhitespace;
+    this.processAll = processAll;
   }
 
   @Override
   public void visitStringNode(Text node) {
     super.visitStringNode(node);
-    if (this.inBody && !this.inScript && (!skipWhitespace || !StringUtils.isBlank(node.getText()))) {
+    if ((processAll || this.inBody) && !this.inScript && (!skipWhitespace || !StringUtils.isBlank(node.getText()))) {
       int from = node.getStartPosition();
       int to = node.getEndPosition();
       textSpans.add(new HtmlConverterPSpan(from, to, node.getText()));

Modified: uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/HtmlConverter.xml
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/HtmlConverter.xml?rev=1667798&r1=1667797&r2=1667798&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/HtmlConverter.xml (original)
+++ uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/HtmlConverter.xml Thu Mar 19 16:10:26 2015
@@ -1,5 +1,4 @@
 <?xml version="1.0" encoding="UTF-8"?>
-
 <!--
   Licensed to the Apache Software Foundation (ASF) under one
   or more contributor license agreements.  See the NOTICE file
@@ -83,6 +82,12 @@ Defaults to heuristic.</description>
         <type>Boolean</type>
         <multiValued>false</multiValued>
         <mandatory>false</mandatory>
+      </configurationParameter>
+    <configurationParameter>
+        <name>processAll</name>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
       </configurationParameter>
     </configurationParameters>
     <configurationParameterSettings>