You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2015/03/19 17:10:27 UTC
svn commit: r1667798 - in /uima/ruta/trunk/ruta-core/src/main:
java/org/apache/uima/ruta/engine/HtmlConverter.java
java/org/apache/uima/ruta/engine/HtmlConverterVisitor.java
resources/org/apache/uima/ruta/engine/HtmlConverter.xml
Author: pkluegl
Date: Thu Mar 19 16:10:26 2015
New Revision: 1667798
URL: http://svn.apache.org/r1667798
Log:
UIMA-4286
- added configuration parameter to avoid inBody
Modified:
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/HtmlConverter.java
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/HtmlConverterVisitor.java
uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/HtmlConverter.xml
Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/HtmlConverter.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/HtmlConverter.java?rev=1667798&r1=1667797&r2=1667798&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/HtmlConverter.java (original)
+++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/HtmlConverter.java Thu Mar 19 16:10:26 2015
@@ -114,6 +114,15 @@ public class HtmlConverter extends JCasA
private Boolean skipWhitespaces;
/**
+ * TODO
+ */
+ public static final String PARAM_PROCESS_ALL = "processAll";
+
+ @ConfigurationParameter(name = PARAM_PROCESS_ALL, mandatory = false, defaultValue = "false")
+ private Boolean processAll;
+
+
+ /**
* This string parameter determines the character sequence that replaces a linebreak. The default
* behavior is the empty string.
*/
@@ -186,6 +195,8 @@ public class HtmlConverter extends JCasA
replaceLinebreaks = replaceLinebreaks == null ? true : replaceLinebreaks;
skipWhitespaces = (Boolean) aContext.getConfigParameterValue(PARAM_SKIP_WHITESPACES);
skipWhitespaces = skipWhitespaces == null ? true : skipWhitespaces;
+ processAll = (Boolean) aContext.getConfigParameterValue(PARAM_PROCESS_ALL);
+ processAll = processAll == null ? true : processAll;
linebreakReplacement = (String) aContext.getConfigParameterValue(PARAM_LINEBREAK_REPLACEMENT);
linebreakReplacement = linebreakReplacement == null ? "" : linebreakReplacement;
String conversionPolicy = (String) aContext.getConfigParameterValue(PARAM_CONVERSION_POLICY);
@@ -267,7 +278,7 @@ public class HtmlConverter extends JCasA
try {
Parser parser = new Parser(documentText);
NodeList list = parser.parse(null);
- HtmlConverterVisitor visitor = new HtmlConverterVisitor(newlineInducingTags, skipWhitespaces);
+ HtmlConverterVisitor visitor = new HtmlConverterVisitor(newlineInducingTags, skipWhitespaces, processAll);
list.visitAllNodesWith(visitor);
visibleSpansSoFar = visitor.getTextSpans();
linebreaksFromHtmlTags = visitor.getLinebreaksFromHtmlTags();
Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/HtmlConverterVisitor.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/HtmlConverterVisitor.java?rev=1667798&r1=1667797&r2=1667798&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/HtmlConverterVisitor.java (original)
+++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/HtmlConverterVisitor.java Thu Mar 19 16:10:26 2015
@@ -45,15 +45,18 @@ public class HtmlConverterVisitor extend
private Collection<String> newlineInducingTags;
- public HtmlConverterVisitor(String[] newlineInducingTags, boolean skipWhitespace) {
+ private boolean processAll = true;
+
+ public HtmlConverterVisitor(String[] newlineInducingTags, boolean skipWhitespace, boolean processAll) {
this.newlineInducingTags = Arrays.asList(newlineInducingTags);
this.skipWhitespace = skipWhitespace;
+ this.processAll = processAll;
}
@Override
public void visitStringNode(Text node) {
super.visitStringNode(node);
- if (this.inBody && !this.inScript && (!skipWhitespace || !StringUtils.isBlank(node.getText()))) {
+ if ((processAll || this.inBody) && !this.inScript && (!skipWhitespace || !StringUtils.isBlank(node.getText()))) {
int from = node.getStartPosition();
int to = node.getEndPosition();
textSpans.add(new HtmlConverterPSpan(from, to, node.getText()));
Modified: uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/HtmlConverter.xml
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/HtmlConverter.xml?rev=1667798&r1=1667797&r2=1667798&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/HtmlConverter.xml (original)
+++ uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/HtmlConverter.xml Thu Mar 19 16:10:26 2015
@@ -1,5 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
-
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
@@ -83,6 +82,12 @@ Defaults to heuristic.</description>
<type>Boolean</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
+ </configurationParameter>
+ <configurationParameter>
+ <name>processAll</name>
+ <type>Boolean</type>
+ <multiValued>false</multiValued>
+ <mandatory>false</mandatory>
</configurationParameter>
</configurationParameters>
<configurationParameterSettings>