You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2017/01/03 16:26:02 UTC

svn commit: r1777155 - in /uima/ruta/trunk/ruta-core/src: main/java/org/apache/uima/ruta/engine/HtmlConverter.java test/java/org/apache/uima/ruta/engine/HtmlConverterTest.java test/java/org/apache/uima/ruta/engine/HtmlConverterXmlTest.java

Author: pkluegl
Date: Tue Jan  3 16:26:02 2017
New Revision: 1777155

URL: http://svn.apache.org/viewvc?rev=1777155&view=rev
Log:
UIMA-5147
- refactored config params
- added test

Modified:
    uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/HtmlConverter.java
    uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/HtmlConverterTest.java
    uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/HtmlConverterXmlTest.java

Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/HtmlConverter.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/HtmlConverter.java?rev=1777155&r1=1777154&r2=1777155&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/HtmlConverter.java (original)
+++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/HtmlConverter.java Tue Jan  3 16:26:02 2017
@@ -83,7 +83,7 @@ public class HtmlConverter extends JCasA
    */
   public static final String PARAM_OUTPUT_VIEW = "outputView";
 
-  @ConfigurationParameter(name = PARAM_OUTPUT_VIEW, mandatory = false, defaultValue = DEFAULT_MODIFIED_VIEW)
+  @ConfigurationParameter(name = PARAM_OUTPUT_VIEW, mandatory = true, defaultValue = DEFAULT_MODIFIED_VIEW)
   private String modifiedViewName;
 
   /**
@@ -101,7 +101,7 @@ public class HtmlConverter extends JCasA
   public static final String PARAM_REPLACE_LINEBREAKS = "replaceLinebreaks";
 
   @ConfigurationParameter(name = PARAM_REPLACE_LINEBREAKS, mandatory = false, defaultValue = "true")
-  private Boolean replaceLinebreaks;
+  private boolean replaceLinebreaks;
 
   /**
    * This boolean parameter determines if the converter should skip whitespaces. Html documents
@@ -113,7 +113,7 @@ public class HtmlConverter extends JCasA
   public static final String PARAM_SKIP_WHITESPACES = "skipWhitespaces";
 
   @ConfigurationParameter(name = PARAM_SKIP_WHITESPACES, mandatory = false, defaultValue = "true")
-  private Boolean skipWhitespaces;
+  private boolean skipWhitespaces;
 
   /**
    * If this boolean parameter is set to true, then the tags of the complete document is processed
@@ -121,8 +121,8 @@ public class HtmlConverter extends JCasA
    */
   public static final String PARAM_PROCESS_ALL = "processAll";
 
-  @ConfigurationParameter(name = PARAM_PROCESS_ALL, mandatory = false, defaultValue = "false")
-  private Boolean processAll;
+  @ConfigurationParameter(name = PARAM_PROCESS_ALL, mandatory = true, defaultValue = "false")
+  private boolean processAll;
 
   /**
    * If this boolean parameter is set to true, then zero-length annotation will not be dropped, but
@@ -131,8 +131,8 @@ public class HtmlConverter extends JCasA
    */
   public static final String PARAM_EXPAND_OFFSETS = "expandOffsets";
 
-  @ConfigurationParameter(name = PARAM_EXPAND_OFFSETS, mandatory = false, defaultValue = "false")
-  private Boolean expandOffsets;
+  @ConfigurationParameter(name = PARAM_EXPAND_OFFSETS, mandatory = true, defaultValue = "false")
+  private boolean expandOffsets;
 
   /**
    * This string parameter determines the character sequence that replaces a linebreak. The default
@@ -165,11 +165,11 @@ public class HtmlConverter extends JCasA
 
   /**
    * This string array parameter sets the names of the html tags that create additional text in the
-   * output view. The acutal string of the gap is defined by the parameter <code>gapText</code>.
+   * output view. The actual string of the gap is defined by the parameter <code>gapText</code>.
    */
   public static final String PARAM_GAP_INDUCING_TAGS = "gapInducingTags";
 
-  @ConfigurationParameter(name = PARAM_GAP_INDUCING_TAGS, mandatory = false)
+  @ConfigurationParameter(name = PARAM_GAP_INDUCING_TAGS, mandatory = true, defaultValue = {})
   private String[] gapInducingTags;
 
   /**
@@ -178,7 +178,7 @@ public class HtmlConverter extends JCasA
    */
   public static final String PARAM_GAP_TEXT = "gapText";
 
-  @ConfigurationParameter(name = PARAM_GAP_TEXT, mandatory = false, defaultValue = "")
+  @ConfigurationParameter(name = PARAM_GAP_TEXT, mandatory = true, defaultValue = "")
   private String gapText;
 
   /**
@@ -186,8 +186,8 @@ public class HtmlConverter extends JCasA
    */
   public static final String PARAM_USE_SPACE_GAP = "useSpaceGap";
 
-  @ConfigurationParameter(name = PARAM_USE_SPACE_GAP, mandatory = false, defaultValue = "")
-  private Boolean useSpaceGap;
+  @ConfigurationParameter(name = PARAM_USE_SPACE_GAP, mandatory = true, defaultValue = "false")
+  private boolean useSpaceGap;
 
   /**
    * This string array parameter can be used to apply custom conversions. It defaults to a list of
@@ -212,7 +212,7 @@ public class HtmlConverter extends JCasA
    */
   public static final String PARAM_CONVERSION_POLICY = "conversionPolicy";
 
-  @ConfigurationParameter(name = PARAM_CONVERSION_POLICY, mandatory = false, defaultValue = "heuristic")
+  @ConfigurationParameter(name = PARAM_CONVERSION_POLICY, mandatory = true, defaultValue = "heuristic")
   private String conversionPolicy;
 
   /**
@@ -232,17 +232,6 @@ public class HtmlConverter extends JCasA
   @Override
   public void initialize(UimaContext aContext) throws ResourceInitializationException {
     super.initialize(aContext);
-    inputViewName = (String) aContext.getConfigParameterValue(PARAM_INPUT_VIEW);
-    inputViewName = StringUtils.isBlank(inputViewName) ? null : inputViewName;
-    modifiedViewName = (String) aContext.getConfigParameterValue(PARAM_OUTPUT_VIEW);
-    modifiedViewName = StringUtils.isBlank(modifiedViewName) ? DEFAULT_MODIFIED_VIEW
-            : modifiedViewName;
-    replaceLinebreaks = (Boolean) aContext.getConfigParameterValue(PARAM_REPLACE_LINEBREAKS);
-    replaceLinebreaks = replaceLinebreaks == null ? true : replaceLinebreaks;
-    skipWhitespaces = (Boolean) aContext.getConfigParameterValue(PARAM_SKIP_WHITESPACES);
-    skipWhitespaces = skipWhitespaces == null ? true : skipWhitespaces;
-    processAll = (Boolean) aContext.getConfigParameterValue(PARAM_PROCESS_ALL);
-    processAll = processAll == null ? true : processAll;
     linebreakReplacement = (String) aContext.getConfigParameterValue(PARAM_LINEBREAK_REPLACEMENT);
     linebreakReplacement = linebreakReplacement == null ? "" : linebreakReplacement;
     String conversionPolicy = (String) aContext.getConfigParameterValue(PARAM_CONVERSION_POLICY);
@@ -254,23 +243,11 @@ public class HtmlConverter extends JCasA
       throw new ResourceInitializationException("illegal conversionPolicy parameter value",
               new Object[0]);
     }
-    String[] nlTags = (String[]) aContext.getConfigParameterValue(PARAM_NEWLINE_INDUCING_TAGS);
-    if (nlTags == null) {
-      newlineInducingTags = new String[] { "br", "p", "div", "ul", "ol", "dl", "li", "h1", "h2",
-          "h3", "h4", "h5", "h6", "blockquote" };
-
-    }
     // check assertions
     if (modifiedViewName.equals(inputViewName)) {
       throw new ResourceInitializationException("input and output view names must differ!",
               new Object[0]);
     }
-    conversionPatterns = (String[]) aContext.getConfigParameterValue(PARAM_CONVERSION_PATTERNS);
-    if (conversionPatterns == null) {
-      conversionPatterns = new String[] { "&nbsp;", "&laquo;", "&raquo;", "&quot;", "&amp;",
-          "&lt;", "&gt;", "&apos;", "&sect;", "&uml;", "&copy;", "&trade;", "&reg;", "&ouml;",
-          "&auml;", "&uuml;", "&#160;" };
-    }
     conversionReplacements = (String[]) aContext
             .getConfigParameterValue(PARAM_CONVERSION_REPLACEMENTS);
     if (conversionReplacements == null) {
@@ -282,24 +259,11 @@ public class HtmlConverter extends JCasA
       }
     }
 
-    gapText = (String) aContext.getConfigParameterValue(PARAM_GAP_TEXT);
-    gapText = gapText == null ? "" : gapText;
-
-    useSpaceGap = (Boolean) aContext.getConfigParameterValue(PARAM_USE_SPACE_GAP);
-    useSpaceGap = useSpaceGap == null ? false : useSpaceGap;
 
     if (useSpaceGap) {
       gapText = " ";
     }
 
-    gapInducingTags = (String[]) aContext.getConfigParameterValue(PARAM_GAP_INDUCING_TAGS);
-    gapInducingTags = gapInducingTags == null ? new String[0] : gapInducingTags;
-
-    expandOffsets = (Boolean) aContext.getConfigParameterValue(PARAM_EXPAND_OFFSETS);
-    expandOffsets = expandOffsets == null ? false : expandOffsets;
-
-    newlineInducingTagRegExp = (String) aContext
-            .getConfigParameterValue(PARAM_NEWLINE_INDUCING_TAG_REGEXP);
   }
 
   @Override

Modified: uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/HtmlConverterTest.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/HtmlConverterTest.java?rev=1777155&r1=1777154&r2=1777155&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/HtmlConverterTest.java (original)
+++ uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/HtmlConverterTest.java Tue Jan  3 16:26:02 2017
@@ -515,4 +515,34 @@ public class HtmlConverterTest {
     // fini
     cas.release();
   }
+  
+  @Test
+  public void testStyle() throws Exception {
+    String html = "<html><head>\n" + "<style>\n" + "/*  */\n" + ".test {\n" + "   text-align: left;\n" + "}\n"
+            + "</style>\n" + "</head><body>Hello world</body></html>";
+    URL url = HtmlConverter.class.getClassLoader().getResource("HtmlConverter.xml");
+    if (url == null) {
+      url = HtmlConverter.class.getClassLoader().getResource(
+              "org/apache/uima/ruta/engine/HtmlConverter.xml");
+    }
+    XMLInputSource in = new XMLInputSource(url);
+    ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);
+    AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(specifier);
+    CAS cas = ae.newCAS();
+
+    ae.setConfigParameterValue(HtmlConverter.PARAM_OUTPUT_VIEW, outputViewName);
+    ae.reconfigure();
+    cas.reset();
+    cas.setDocumentText(html);
+
+    ae.process(cas);
+
+    CAS modifiedView = cas.getView(outputViewName);
+    String text = modifiedView.getDocumentText();
+
+    String expectedText = "Hello world";
+    assertEquals(expectedText, text);
+
+    cas.release();
+  }
 }

Modified: uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/HtmlConverterXmlTest.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/HtmlConverterXmlTest.java?rev=1777155&r1=1777154&r2=1777155&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/HtmlConverterXmlTest.java (original)
+++ uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/HtmlConverterXmlTest.java Tue Jan  3 16:26:02 2017
@@ -47,14 +47,14 @@ public class HtmlConverterXmlTest {
 
     URL urlA = HtmlAnnotator.class.getClassLoader().getResource("HtmlAnnotator.xml");
     if (urlA == null) {
-      urlA = HtmlAnnotator.class.getClassLoader().getResource(
-              "org/apache/uima/ruta/engine/HtmlAnnotator.xml");
+      urlA = HtmlAnnotator.class.getClassLoader()
+              .getResource("org/apache/uima/ruta/engine/HtmlAnnotator.xml");
     }
 
     URL urlC = HtmlAnnotator.class.getClassLoader().getResource("HtmlConverter.xml");
     if (urlC == null) {
-      urlC = HtmlAnnotator.class.getClassLoader().getResource(
-              "org/apache/uima/ruta/engine/HtmlConverter.xml");
+      urlC = HtmlAnnotator.class.getClassLoader()
+              .getResource("org/apache/uima/ruta/engine/HtmlConverter.xml");
     }
 
     XMLInputSource inA = new XMLInputSource(urlA);
@@ -68,8 +68,8 @@ public class HtmlConverterXmlTest {
     AnalysisEngine aeC = UIMAFramework.produceAnalysisEngine(specifierC);
     aeC.setConfigParameterValue(HtmlConverter.PARAM_SKIP_WHITESPACES, false);
     aeC.setConfigParameterValue(HtmlConverter.PARAM_PROCESS_ALL, true);
-    aeC.setConfigParameterValue(HtmlConverter.PARAM_GAP_INDUCING_TAGS, new String[] { "child1",
-        "child2", "child3" });
+    aeC.setConfigParameterValue(HtmlConverter.PARAM_GAP_INDUCING_TAGS,
+            new String[] { "child1", "child2", "child3" });
     aeC.setConfigParameterValue(HtmlConverter.PARAM_GAP_TEXT, "$");
     aeC.reconfigure();
 
@@ -107,14 +107,14 @@ public class HtmlConverterXmlTest {
 
     URL urlA = HtmlAnnotator.class.getClassLoader().getResource("HtmlAnnotator.xml");
     if (urlA == null) {
-      urlA = HtmlAnnotator.class.getClassLoader().getResource(
-              "org/apache/uima/ruta/engine/HtmlAnnotator.xml");
+      urlA = HtmlAnnotator.class.getClassLoader()
+              .getResource("org/apache/uima/ruta/engine/HtmlAnnotator.xml");
     }
 
     URL urlC = HtmlAnnotator.class.getClassLoader().getResource("HtmlConverter.xml");
     if (urlC == null) {
-      urlC = HtmlAnnotator.class.getClassLoader().getResource(
-              "org/apache/uima/ruta/engine/HtmlConverter.xml");
+      urlC = HtmlAnnotator.class.getClassLoader()
+              .getResource("org/apache/uima/ruta/engine/HtmlConverter.xml");
     }
 
     XMLInputSource inA = new XMLInputSource(urlA);
@@ -161,9 +161,11 @@ public class HtmlConverterXmlTest {
     next = iterator.next();
     boolean b2 = next.getBooleanValue(expandedFeature);
     assertEquals("More content.", next.getCoveredText());
-    // for one of these two annotation (with same offsets) the feature must be set to true 
+    // for one of these two annotation (with same offsets) the feature must be set to true
     assertEquals(true, b1 || b2);
 
     cas.release();
   }
+
+
 }