You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2009/12/15 00:50:11 UTC
svn commit: r890562 - in
/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft:
ExcelExtractor.java OfficeParser.java
Author: jukka
Date: Mon Dec 14 23:50:10 2009
New Revision: 890562
URL: http://svn.apache.org/viewvc?rev=890562&view=rev
Log:
TIKA-125: Pass Locale information to parsers
Use the parse context to pass Locale settings
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
Modified: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java?rev=890562&r1=890561&r2=890562&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java (original)
+++ lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java Mon Dec 14 23:50:10 2009
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -18,11 +18,11 @@
import java.awt.Point;
import java.io.IOException;
-import java.io.Serializable;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
@@ -113,10 +113,11 @@
* @throws IOException if an error occurs processing the workbook
* or writing the extracted content
*/
- protected void parse(POIFSFileSystem filesystem, XHTMLContentHandler xhtml)
- throws IOException, SAXException {
+ protected void parse(
+ POIFSFileSystem filesystem, XHTMLContentHandler xhtml,
+ Locale locale) throws IOException, SAXException {
// Set up listener and register the records we want to process
- TikaHSSFListener listener = new TikaHSSFListener(xhtml);
+ TikaHSSFListener listener = new TikaHSSFListener(xhtml, locale);
HSSFRequest hssfRequest = new HSSFRequest();
if (listenForAllRecords) {
hssfRequest.addListenerForAllRecords(listener);
@@ -151,7 +152,7 @@
/**
* HSSF Listener implementation which processes the HSSF records.
*/
- private static class TikaHSSFListener implements HSSFListener, Serializable {
+ private static class TikaHSSFListener implements HSSFListener {
/**
* XHTML content handler to which the document content is rendered.
@@ -191,7 +192,7 @@
*
* @see <a href="https://issues.apache.org/jira/browse/TIKA-103">TIKA-103</a>
*/
- private final NumberFormat format = NumberFormat.getInstance();
+ private final NumberFormat format;
/**
* Construct a new listener instance outputting parsed data to
@@ -199,8 +200,9 @@
*
* @param handler Destination to write the parsed output to
*/
- private TikaHSSFListener(XHTMLContentHandler handler) {
+ private TikaHSSFListener(XHTMLContentHandler handler, Locale locale) {
this.handler = handler;
+ this.format = NumberFormat.getInstance(locale);
}
/**
Modified: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java?rev=890562&r1=890561&r2=890562&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java (original)
+++ lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java Mon Dec 14 23:50:10 2009
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -21,6 +21,7 @@
import java.io.InputStream;
import java.util.Date;
import java.util.Iterator;
+import java.util.Locale;
import org.apache.poi.hdgf.extractor.VisioTextExtractor;
import org.apache.poi.hpsf.CustomProperties;
@@ -111,7 +112,8 @@
xhtml.element("p", extractor.getText(true, true));
} else if ("Workbook".equals(name)) {
setType(metadata, "application/vnd.ms-excel");
- new ExcelExtractor().parse(filesystem, xhtml);
+ Locale locale = context.get(Locale.class, Locale.getDefault());
+ new ExcelExtractor().parse(filesystem, xhtml, locale);
} else if ("VisioDocument".equals(name)) {
setType(metadata, "application/vnd.visio");
VisioTextExtractor extractor =