You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/06/24 14:20:08 UTC
tika git commit: TIKA-2019 -- clean up -- move state variables to
inner classes, convert protected to package private, add @Override on parse
Repository: tika
Updated Branches:
refs/heads/master 81279a1e0 -> 2031de70c
TIKA-2019 -- clean up -- move state variables to inner classes, convert protected to package private, add @Override on parse
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/2031de70
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/2031de70
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/2031de70
Branch: refs/heads/master
Commit: 2031de70c117fdabf793008fe22dd9c97c82d2c9
Parents: 81279a1
Author: tballison <ta...@mitre.org>
Authored: Fri Jun 24 10:19:59 2016 -0400
Committer: tballison <ta...@mitre.org>
Committed: Fri Jun 24 10:19:59 2016 -0400
----------------------------------------------------------------------
.../microsoft/xml/AbstractXML2003Parser.java | 45 ++++++++++----------
.../microsoft/xml/SpreadsheetMLParser.java | 3 +-
.../tika/parser/microsoft/xml/WordMLParser.java | 8 ++--
3 files changed, 28 insertions(+), 28 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/2031de70/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/AbstractXML2003Parser.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/AbstractXML2003Parser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/AbstractXML2003Parser.java
index 637b4d6..4e05d0e 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/AbstractXML2003Parser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/AbstractXML2003Parser.java
@@ -43,31 +43,31 @@ import java.io.InputStream;
public abstract class AbstractXML2003Parser extends AbstractParser {
- protected final static String MS_OFFICE_PROPERTIES_URN = "urn:schemas-microsoft-com:office:office";
- protected final static String MS_DOC_PROPERTIES_URN = "urn:schemas-microsoft-com:office:office";
- protected final static String MS_SPREADSHEET_URN = "urn:schemas-microsoft-com:office:spreadsheet";
- protected final static String WORD_ML_URL = "http://schemas.microsoft.com/office/word/2003/wordml";
- protected final static Attributes EMPTY_ATTRS = new AttributesImpl();
+ final static String MS_OFFICE_PROPERTIES_URN = "urn:schemas-microsoft-com:office:office";
+ final static String MS_DOC_PROPERTIES_URN = "urn:schemas-microsoft-com:office:office";
+ final static String MS_SPREADSHEET_URN = "urn:schemas-microsoft-com:office:spreadsheet";
+ final static String WORD_ML_URL = "http://schemas.microsoft.com/office/word/2003/wordml";
+ final static Attributes EMPTY_ATTRS = new AttributesImpl();
- protected final static String DOCUMENT_PROPERTIES = "DocumentProperties";
- protected final static String PICT = "pict";
- protected final static String BIN_DATA = "binData";
+ final static String DOCUMENT_PROPERTIES = "DocumentProperties";
+ final static String PICT = "pict";
+ final static String BIN_DATA = "binData";
- protected final static String A = "a";
- protected final static String BODY = "body";
- protected final static String CDATA = "cdata";
- protected final static String DIV = "div";
- protected final static String HREF = "href";
- protected final static String IMG = "img";
- protected final static String P = "p";
- protected final static String TD = "td";
- protected final static String TR = "tr";
- protected final static String TABLE = "table";
- protected final static String TBODY = "tbody";
+ final static String A = "a";
+ final static String BODY = "body";
+ final static String CDATA = "cdata";
+ final static String DIV = "div";
+ final static String HREF = "href";
+ final static String IMG = "img";
+ final static String P = "p";
+ final static String TD = "td";
+ final static String TR = "tr";
+ final static String TABLE = "table";
+ final static String TBODY = "tbody";
- protected final static String HLINK = "hlink";
- protected final static String HLINK_DEST = "dest";
- protected final static String NAME_ATTR = "name";
+ final static String HLINK = "hlink";
+ final static String HLINK_DEST = "dest";
+ final static String NAME_ATTR = "name";
private static ContentHandler getMSPropertiesHandler(
@@ -77,6 +77,7 @@ public abstract class AbstractXML2003Parser extends AbstractParser {
metadata, property);
}
+ @Override
public void parse(
InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context)
http://git-wip-us.apache.org/repos/asf/tika/blob/2031de70/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/SpreadsheetMLParser.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/SpreadsheetMLParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/SpreadsheetMLParser.java
index 0cf7520..c442453 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/SpreadsheetMLParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/SpreadsheetMLParser.java
@@ -52,8 +52,6 @@ public class SpreadsheetMLParser extends AbstractXML2003Parser {
Collections.unmodifiableSet(new HashSet<>(Arrays.asList(
MEDIA_TYPE)));
- private boolean inBody = false;
-
@Override
public Set<MediaType> getSupportedTypes(ParseContext context) {
return SUPPORTED_TYPES;
@@ -78,6 +76,7 @@ public class SpreadsheetMLParser extends AbstractXML2003Parser {
StringBuilder buffer = new StringBuilder();
String href = null;
boolean inData = false;
+ private boolean inBody = false;
public ExcelMLHandler(ContentHandler handler) {
this.handler = handler;
http://git-wip-us.apache.org/repos/asf/tika/blob/2031de70/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/WordMLParser.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/WordMLParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/WordMLParser.java
index 6bd51da..16b8c46 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/WordMLParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/WordMLParser.java
@@ -62,7 +62,6 @@ public class WordMLParser extends AbstractXML2003Parser {
private static final Set<MediaType> SUPPORTED_TYPES =
Collections.unmodifiableSet(new HashSet<>(Arrays.asList(
MEDIA_TYPE)));
- private boolean inBody = false;
static {
WORDML_TO_XHTML.put(P, P);
@@ -107,6 +106,7 @@ public class WordMLParser extends AbstractXML2003Parser {
private class WordMLHandler extends DefaultHandler {
private final ContentHandler handler;
private boolean ignoreCharacters;
+ private boolean inBody = false;
//use inP to keep track of whether the handler is
//in a paragraph or not. <p><p></p></p> was allowed
@@ -128,7 +128,7 @@ public class WordMLParser extends AbstractXML2003Parser {
}
String html = WORDML_TO_XHTML.get(localName);
if (html != null) {
- if ("p".equals(localName)) {
+ if (P.equals(localName)) {
//close p if already in a p to prevent nested <p>
if (inP) {
handler.endElement(XHTMLContentHandler.XHTML, P, P);
@@ -165,13 +165,13 @@ public class WordMLParser extends AbstractXML2003Parser {
if (html.equals(TABLE)) {
handler.endElement(XHTMLContentHandler.XHTML, TBODY, TBODY);
}
- if ("p".equals(html) && !inP) {
+ if (P.equals(html) && !inP) {
//start p if not already in one to prevent non-matching <p>
handler.startElement(XHTMLContentHandler.XHTML, P, P, EMPTY_ATTRS);
}
handler.endElement(XHTMLContentHandler.XHTML, html, html);
- if ("p".equals(html)) {
+ if (P.equals(html)) {
inP = false;
}
}