You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2009/12/15 00:50:11 UTC

svn commit: r890562 - in /lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft: ExcelExtractor.java OfficeParser.java

Author: jukka
Date: Mon Dec 14 23:50:10 2009
New Revision: 890562

URL: http://svn.apache.org/viewvc?rev=890562&view=rev
Log:
TIKA-125: Pass Locale information to parsers

Use the parse context to pass Locale settings

Modified:
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java

Modified: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java?rev=890562&r1=890561&r2=890562&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java (original)
+++ lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java Mon Dec 14 23:50:10 2009
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -18,11 +18,11 @@
 
 import java.awt.Point;
 import java.io.IOException;
-import java.io.Serializable;
 import java.text.NumberFormat;
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.SortedMap;
 import java.util.TreeMap;
@@ -113,10 +113,11 @@
      * @throws IOException if an error occurs processing the workbook
      * or writing the extracted content
      */
-    protected void parse(POIFSFileSystem filesystem, XHTMLContentHandler xhtml)
-            throws IOException, SAXException {
+    protected void parse(
+            POIFSFileSystem filesystem, XHTMLContentHandler xhtml,
+            Locale locale) throws IOException, SAXException {
         // Set up listener and register the records we want to process
-        TikaHSSFListener listener = new TikaHSSFListener(xhtml);
+        TikaHSSFListener listener = new TikaHSSFListener(xhtml, locale);
         HSSFRequest hssfRequest = new HSSFRequest();
         if (listenForAllRecords) {
             hssfRequest.addListenerForAllRecords(listener);
@@ -151,7 +152,7 @@
     /**
      * HSSF Listener implementation which processes the HSSF records.
      */
-    private static class TikaHSSFListener implements HSSFListener, Serializable {
+    private static class TikaHSSFListener implements HSSFListener {
 
         /**
          * XHTML content handler to which the document content is rendered.
@@ -191,7 +192,7 @@
          *
          * @see <a href="https://issues.apache.org/jira/browse/TIKA-103">TIKA-103</a>
          */
-        private final NumberFormat format = NumberFormat.getInstance();
+        private final NumberFormat format;
 
         /**
          * Construct a new listener instance outputting parsed data to
@@ -199,8 +200,9 @@
          *
          * @param handler Destination to write the parsed output to
          */
-        private TikaHSSFListener(XHTMLContentHandler handler) {
+        private TikaHSSFListener(XHTMLContentHandler handler, Locale locale) {
             this.handler = handler;
+            this.format = NumberFormat.getInstance(locale);
         }
 
         /**

Modified: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java?rev=890562&r1=890561&r2=890562&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java (original)
+++ lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java Mon Dec 14 23:50:10 2009
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -21,6 +21,7 @@
 import java.io.InputStream;
 import java.util.Date;
 import java.util.Iterator;
+import java.util.Locale;
 
 import org.apache.poi.hdgf.extractor.VisioTextExtractor;
 import org.apache.poi.hpsf.CustomProperties;
@@ -111,7 +112,8 @@
                 xhtml.element("p", extractor.getText(true, true));
             } else if ("Workbook".equals(name)) {
                 setType(metadata, "application/vnd.ms-excel");
-                new ExcelExtractor().parse(filesystem, xhtml);
+                Locale locale = context.get(Locale.class, Locale.getDefault());
+                new ExcelExtractor().parse(filesystem, xhtml, locale);
             } else if ("VisioDocument".equals(name)) {
                 setType(metadata, "application/vnd.visio");
                 VisioTextExtractor extractor =