You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/06/02 14:38:29 UTC

[tika] branch main updated: TIKA-3432 -- try to fix rare(?) localization issue in OOXMLParserTest

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 2635b95  TIKA-3432 -- try to fix rare(?) localization issue in OOXMLParserTest
2635b95 is described below

commit 2635b9560e7d295dc859c7528fe443b4847956a5
Author: tallison <ta...@apache.org>
AuthorDate: Wed Jun 2 10:38:08 2021 -0400

    TIKA-3432 -- try to fix rare(?) localization issue in OOXMLParserTest
---
 .../parser/microsoft/ooxml/OOXMLParserTest.java    | 99 +++++++++++-----------
 1 file changed, 49 insertions(+), 50 deletions(-)

diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
index 94758bc..153f6fc 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
@@ -44,6 +44,7 @@ import javax.xml.transform.stream.StreamResult;
 
 import org.apache.poi.util.LocaleUtil;
 import org.junit.AfterClass;
+import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Ignore;
 import org.junit.Test;
@@ -77,14 +78,19 @@ public class OOXMLParserTest extends TikaTest {
     @BeforeClass
     public static void setUp() {
         USER_LOCALE = LocaleUtil.getUserLocale();
-        LocaleUtil.setUserLocale(Locale.US);
     }
 
     @AfterClass
     public static void tearDown() {
         LocaleUtil.setUserLocale(USER_LOCALE);
+        Locale.setDefault(USER_LOCALE);
     }
 
+    @Before
+    public void beforeEach() {
+        LocaleUtil.setUserLocale(Locale.US);
+        Locale.setDefault(Locale.US);
+    }
 
     @Test
     public void testExcel() throws Exception {
@@ -1456,55 +1462,48 @@ public class OOXMLParserTest extends TikaTest {
 
     @Test
     public void testXLSBVarious() throws Exception {
-        try {
-            LocaleUtil.setUserLocale(Locale.US);
-            //have to set to US because of a bug in POI for $   3.03 in Locale.ITALIAN
-            OfficeParserConfig officeParserConfig = new OfficeParserConfig();
-            officeParserConfig.setExtractMacros(true);
-            ParseContext parseContext = new ParseContext();
-            parseContext.set(OfficeParserConfig.class, officeParserConfig);
-            List<Metadata> metadataList =
-                    getRecursiveMetadata("testEXCEL_various.xlsb", parseContext);
-            assertEquals(4, metadataList.size());
-
-            String xml = metadataList.get(0).get(TikaCoreProperties.TIKA_CONTENT);
-            assertContains("<td>13</td>", xml);
-            assertContains("<td>13.1211231321</td>", xml);
-            assertContains("<td>$   3.03</td>", xml);
-            assertContains("<td>20%</td>", xml);
-            assertContains("<td>13.12</td>", xml);
-            assertContains("<td>123456789012345</td>", xml);
-            assertContains("<td>1.23456789012345E+15</td>", xml);
-            assertContains("test comment2", xml);
-
-            assertContains("comment4 (end of row)", xml);
-
-
-            assertContains("<td>1/4</td>", xml);
-            assertContains("<td>3/9/17</td>", xml);
-            assertContains("<td>4</td>", xml);
-            assertContains("<td>2</td>", xml);
-
-            assertContains("<td>   46/1963</td>", xml);
-            assertContains("<td>  3/128</td>", xml);
-            assertContains("test textbox", xml);
-
-            assertContains("test WordArt", xml);
-
-            assertContains("<a href=\"http://lucene.apache.org/\">http://lucene.apache.org/</a>",
-                    xml);
-            assertContains("<a href=\"http://tika.apache.org/\">http://tika.apache.org/</a>", xml);
-
-            assertContains("OddLeftHeader OddCenterHeader OddRightHeader", xml);
-            assertContains("EvenLeftHeader EvenCenterHeader EvenRightHeader", xml);
-
-            assertContains("FirstPageLeftHeader FirstPageCenterHeader FirstPageRightHeader", xml);
-            assertContains("OddLeftFooter OddCenterFooter OddRightFooter", xml);
-            assertContains("EvenLeftFooter EvenCenterFooter EvenRightFooter", xml);
-            assertContains("FirstPageLeftFooter FirstPageCenterFooter FirstPageRightFooter", xml);
-        } finally {
-            LocaleUtil.setUserLocale(USER_LOCALE);
-        }
+        OfficeParserConfig officeParserConfig = new OfficeParserConfig();
+        officeParserConfig.setExtractMacros(true);
+        ParseContext parseContext = new ParseContext();
+        parseContext.set(OfficeParserConfig.class, officeParserConfig);
+        List<Metadata> metadataList = getRecursiveMetadata("testEXCEL_various.xlsb", parseContext);
+        assertEquals(4, metadataList.size());
+
+        String xml = metadataList.get(0).get(TikaCoreProperties.TIKA_CONTENT);
+        assertContains("<td>13</td>", xml);
+        assertContains("<td>13.1211231321</td>", xml);
+        assertContains("<td>$   3.03</td>", xml);
+        assertContains("<td>20%</td>", xml);
+        assertContains("<td>13.12</td>", xml);
+        assertContains("<td>123456789012345</td>", xml);
+        assertContains("<td>1.23456789012345E+15</td>", xml);
+        assertContains("test comment2", xml);
+
+        assertContains("comment4 (end of row)", xml);
+
+
+        assertContains("<td>1/4</td>", xml);
+        assertContains("<td>3/9/17</td>", xml);
+        assertContains("<td>4</td>", xml);
+        assertContains("<td>2</td>", xml);
+
+        assertContains("<td>   46/1963</td>", xml);
+        assertContains("<td>  3/128</td>", xml);
+        assertContains("test textbox", xml);
+
+        assertContains("test WordArt", xml);
+
+        assertContains("<a href=\"http://lucene.apache.org/\">http://lucene.apache.org/</a>", xml);
+        assertContains("<a href=\"http://tika.apache.org/\">http://tika.apache.org/</a>", xml);
+
+        assertContains("OddLeftHeader OddCenterHeader OddRightHeader", xml);
+        assertContains("EvenLeftHeader EvenCenterHeader EvenRightHeader", xml);
+
+        assertContains("FirstPageLeftHeader FirstPageCenterHeader FirstPageRightHeader", xml);
+        assertContains("OddLeftFooter OddCenterFooter OddRightFooter", xml);
+        assertContains("EvenLeftFooter EvenCenterFooter EvenRightFooter", xml);
+        assertContains("FirstPageLeftFooter FirstPageCenterFooter FirstPageRightFooter", xml);
+
     }
 
     @Test